Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed Funcotator VCF output renderer to keep B37 contig names in the VCF output file #8539

Merged
merged 3 commits into from
Oct 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -874,7 +874,6 @@ public void apply(final VariantContext variant, final ReadsContext readsContext,
// Get the correct reference for B37/HG19 compliance:
// This is necessary because of the variant transformation that gets applied in VariantWalkerBase::apply.
final ReferenceContext correctReferenceContext = funcotatorEngine.getCorrectReferenceContext(variant, referenceContext);

// Place the variant on our queue to be funcotated:
enqueueAndHandleVariant(variant, correctReferenceContext, featureContext);
}
Expand Down Expand Up @@ -924,7 +923,11 @@ protected void enqueueAndHandleVariant(final VariantContext variant, final Refer

final FuncotationMap funcotationMap = funcotatorEngine.createFuncotationMapForVariant(variant, referenceContext, featureContext);

// This is necessary because we want to revert the variant contig name change if it was applied in the FuncotatorEngine::getCorrectVariantContextForReference method before outputting the vcf.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment is still misleading -- can you make it clear that we don't always want to revert the variant contig name change, only in the very specific case where the input is b37 and the reference is hg19?

// NOTE: this will only revert the variantContext if it was originally changed (only for B37 VCFs)
final VariantContext variantContextForOutput = funcotatorEngine.getCorrectVariantContextForOutput(variant);

// At this point there is only one transcript ID in the funcotation map if canonical or best effect are selected
outputRenderer.write(variant, funcotationMap);
outputRenderer.write(variantContextForOutput, funcotationMap);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ public final class FuncotatorEngine implements AutoCloseable {
*/
private final boolean mustConvertInputContigsToHg19;

/**
* Whether the output variant contigs must be converted back to B37 from hg19 before being returned.
* (NOTE: this means that the output contigs will continue to use B37 contig names even if internally we converted them to hg19)
*/
private boolean mustRevertVariantContigsFromHg19ToB37 = false;

/**
* Whether this {@link FuncotatorEngine} has only produced annotations on variants that have been labeled by the
* {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotationFactory} as {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation.VariantClassification#IGR}.
Expand Down Expand Up @@ -327,6 +333,22 @@ private VariantContext getCorrectVariantContextForReference(final VariantContext
}
}

/**
* Create a new {@link VariantContext} which will match the given Reference if there is a mismatch for input between the B37 reference and the HG19 reference.
* @param variant A {@link VariantContext} object containing the variant to convert.
* @return A {@link VariantContext} whose contig has been transformed to HG19 if requested by the user. Otherwise, an identical variant.
*/
VariantContext getCorrectVariantContextForOutput(final VariantContext variant) {
if ( mustRevertVariantContigsFromHg19ToB37 ) {
final VariantContextBuilder vcb = new VariantContextBuilder(variant);
vcb.chr(FuncotatorUtils.convertHG19ContigToB37Contig(variant.getContig()));
return vcb.make();
jamesemery marked this conversation as resolved.
Show resolved Hide resolved
}
else {
return variant;
}
}

/**
* @return The default {@link VariantTransformer} which will automatically convert from the B37 reference standard to the HG19 reference standard for contig names.
*/
Expand Down Expand Up @@ -483,7 +505,7 @@ private boolean determineReferenceAndDatasourceCompatibility() {
}
else if ( funcotatorArgs.referenceVersion.equals(BaseFuncotatorArgumentCollection.FuncotatorReferenceVersionHg19) &&
FuncotatorUtils.isSequenceDictionaryUsingB37Reference(sequenceDictionaryForDrivingVariants) ) {
logger.info("VCF sequence dictionary detected as B37 in HG19 annotation mode. Performing conversion.");
logger.info("VCF sequence dictionary detected as B37 in HG19 annotation mode. Performing conversion. (NOTE: the output VCF will still be B37)");
mustConvertInputContigsToHg19 = true;
}
else {
Expand All @@ -505,6 +527,11 @@ else if ( funcotatorArgs.referenceVersion.equals(BaseFuncotatorArgumentCollectio
"There MAY be some errors (e.g. in the Y chromosome, but possibly in other places as well) due to changes between the two references.");
}

// Record whether we need to revert the contigs back to B37 after annotation:
if (FuncotatorUtils.isSequenceDictionaryUsingB37Reference(sequenceDictionaryForDrivingVariants) && mustConvertInputContigsToHg19) {
jamesemery marked this conversation as resolved.
Show resolved Hide resolved
this.mustRevertVariantContigsFromHg19ToB37 = true;
}

return mustConvertInputContigsToHg19;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,8 @@ public void write(final VariantContext variant, final FuncotationMap txToFuncota
variantContextOutputBuilder.genotypes( variant.getGenotypes() );

// Render and add our VCF line:
vcfWriter.add( variantContextOutputBuilder.make() );
VariantContext out = variantContextOutputBuilder.make();
vcfWriter.add( out );
}

private Funcotation createManualAnnotationFuncotation(final Allele altAllele) {
Expand Down
Git LFS file not shown
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,4 @@
##reference=/cromwell_root/broad-references/hg19/v0/Homo_sapiens_assembly19.fasta
##source=Funcotator
#CHROM POS ID REF ALT QUAL FILTER INFO
chr2 70120909 rs3214822 GA G 722.12 PASS FUNCOTATION=[SNRNP27|hg19|chr2|70120910|70120910|FIVE_PRIME_UTR||DEL|A|A|-|g.chr2:70120910delA|ENST00000244227.3|+|1|||||0.43640897755610975|GGGAAAAATGAAAGCTGTGTT|SNRNP27_ENST00000409116.1_FIVE_PRIME_FLANK/SNRNP27_ENST00000488986.1_FIVE_PRIME_FLANK|||||||||||||||||||||||||91|biliary_tract(2)_%7C_breast(12)_%7C_central_nervous_system(44)_%7C_large_intestine(11)_%7C_pancreas(22)|||||||X76302|NM_006857.2|NP_006848.1|HGNC:30240|small_%20_nuclear_%20_ribonucleoprotein_%20_U4/U6.U5_%20_subunit_%20_27|Approved|gene_%20_with_%20_protein_%20_product|protein-coding_%20_gene||"small_%20_nuclear_%20_ribonucleoprotein_%20_27kDa_%20_(U4/U6.U5)"_%2C__%20_"small_%20_nuclear_%20_ribonucleoprotein_%2C__%20_U4/U6.U5_%20_27kDa_%20_subunit"|RY1_%2C__%20_U4/U6.U5-27K|"nucleic_%20_acid_%20_binding_%20_protein_%20_RY_%20_1"_%2C__%20_"U4/U6.U5_%20_small_%20_nuclear_%20_ribonucleoprotein_%20_27_%20_kDa_%20_protein"|2p13.3|2016-10-05||2016-03-11|X76302||11017|ENSG00000124380|7931148_%2C__%20_9085842|NM_006857|||CCDS33219|OTTHUMG00000152689|11017||NM_006857|Q8WVK2|ENSG00000124380|uc002sfw.4|SNR27_HUMAN||Q15410|Q8WVK2|mRNA_%20_processing_%20_(GO:0006397)_%7C_RNA_%20_splicing_%20_(GO:0008380)|nucleus_%20_(GO:0005634)|nucleic_%20_acid_%20_binding_%20_(GO:0003676)|_%7C_|_%7C_|_%7C_|true_%7C_true|false_%7C_false|0.6222_%2C_0.3778_%7C_0.6222_%2C_0.3778|false_%7C_false|false_%7C_false|1_%7C_1|_%7C_|false_%7C_false|true_%7C_false|true_%7C_false|SNRNP27:11017_%7C_SNRNP27:11017|true_%7C_false|false_%7C_false|false_%7C_false|true_%7C_false|true_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|true_%7C_true|false_%7C_false|3214822_%7C_397747233|70120910_%7C_70120912|false_%7C_false|false_%7C_false|0_%7C_0|true_%7C_true|0_%7C_0|false_%7C_false|0.661217_%2C_0.338783_%7C_|false_%7C_false|false_%7C_false|false_%7C_false|DIV_%7C_DIV|true_%7C_false|0x05010002000517013e000200_%7C_0x050100020005000002000200|1_%7C_1|false_%7C_false|134_%7C_138|rs3214822_%7C_rs397747233|_%7C_]
2 70120909 rs3214822 GA G 722.12 PASS FUNCOTATION=[SNRNP27|hg19|chr2|70120910|70120910|FIVE_PRIME_UTR||DEL|A|A|-|g.chr2:70120910delA|ENST00000244227.3|+|1|||||0.43640897755610975|GGGAAAAATGAAAGCTGTGTT|SNRNP27_ENST00000409116.1_FIVE_PRIME_FLANK/SNRNP27_ENST00000488986.1_FIVE_PRIME_FLANK|||||||||||||||||||||||||91|biliary_tract(2)_%7C_breast(12)_%7C_central_nervous_system(44)_%7C_large_intestine(11)_%7C_pancreas(22)|||||||X76302|NM_006857.2|NP_006848.1|HGNC:30240|small_%20_nuclear_%20_ribonucleoprotein_%20_U4/U6.U5_%20_subunit_%20_27|Approved|gene_%20_with_%20_protein_%20_product|protein-coding_%20_gene||"small_%20_nuclear_%20_ribonucleoprotein_%20_27kDa_%20_(U4/U6.U5)"_%2C__%20_"small_%20_nuclear_%20_ribonucleoprotein_%2C__%20_U4/U6.U5_%20_27kDa_%20_subunit"|RY1_%2C__%20_U4/U6.U5-27K|"nucleic_%20_acid_%20_binding_%20_protein_%20_RY_%20_1"_%2C__%20_"U4/U6.U5_%20_small_%20_nuclear_%20_ribonucleoprotein_%20_27_%20_kDa_%20_protein"|2p13.3|2016-10-05||2016-03-11|X76302||11017|ENSG00000124380|7931148_%2C__%20_9085842|NM_006857|||CCDS33219|OTTHUMG00000152689|11017||NM_006857|Q8WVK2|ENSG00000124380|uc002sfw.4|SNR27_HUMAN||Q15410|Q8WVK2|mRNA_%20_processing_%20_(GO:0006397)_%7C_RNA_%20_splicing_%20_(GO:0008380)|nucleus_%20_(GO:0005634)|nucleic_%20_acid_%20_binding_%20_(GO:0003676)|_%7C_|_%7C_|_%7C_|true_%7C_true|false_%7C_false|0.6222_%2C_0.3778_%7C_0.6222_%2C_0.3778|false_%7C_false|false_%7C_false|1_%7C_1|_%7C_|false_%7C_false|true_%7C_false|true_%7C_false|SNRNP27:11017_%7C_SNRNP27:11017|true_%7C_false|false_%7C_false|false_%7C_false|true_%7C_false|true_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|true_%7C_true|false_%7C_false|3214822_%7C_397747233|70120910_%7C_70120912|false_%7C_false|false_%7C_false|0_%7C_0|true_%7C_true|0_%7C_0|false_%7C_false|0.661217_%2C_0.338783_%7C_|false_%7C_false|false_%7C_false|false_%7C_false|DIV_%7C_DIV|true_%7C_false|0x05010002000517013e000200_%7C_0x050100020005000002000200|1_%7C_1|false_%7C_false|134_%7C_138|rs3214822_%7C_rs397747233|_%7C_]
Loading