Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added an explicit override to RMSMappingQuality annotation to enable old code fallback. #6060

Merged
merged 4 commits into from
Jul 31, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
import htsjdk.variant.vcf.VCFStandardHeaderLines;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.ReadFilterArgumentDefinitions;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.ReducibleAnnotation;
Expand Down Expand Up @@ -55,6 +57,10 @@ public final class RMSMappingQuality extends InfoFieldAnnotation implements Stan
public static final int NUM_LIST_ENTRIES = 2;
public static final int SUM_OF_SQUARES_INDEX = 0;
public static final int TOTAL_DEPTH_INDEX = 1;
public static final String RMS_MAPPING_QUALITY_OLD_BEHAVIOR_OVERRIDE_ARGUMENT = "allow-old-rms-mapping-quality-annotation-data";

@Argument(fullName = RMS_MAPPING_QUALITY_OLD_BEHAVIOR_OVERRIDE_ARGUMENT, doc="Override to allow old RMSMappingQuality annotated VCFs to function", optional=true)
public boolean allowOlderRawKeyValues = false;

@Override
public String getRawKeyName() { return GATKVCFConstants.RAW_MAPPING_QUALITY_WITH_DEPTH_KEY;} //new key for the two-value MQ data to prevent version mismatch catastrophes
Expand Down Expand Up @@ -124,6 +130,14 @@ public Map<String, Object> finalizeRawData(final VariantContext vc, final Varian
rawMQdata = vc.getAttributeAsString(getRawKeyName(), null);
}
else if (vc.hasAttribute(getDeprecatedRawKeyName())) {
if (!allowOlderRawKeyValues) {
throw new UserException.BadInput("Presence of '-"+getDeprecatedRawKeyName()+"' annotation is detected. This GATK version expects key "
+ getRawKeyName() + " with a tuple of sum of squared MQ values and total reads over variant "
+ "genotypes as the value. This could indicate that the provided input was produced with an older version of GATK. " +
"Use the argument '--"+RMS_MAPPING_QUALITY_OLD_BEHAVIOR_OVERRIDE_ARGUMENT+"' to override and attempt the deprecated MQ calculation. There " +
"may be differences in how newer GATK versions calculate DP and MQ that may result in worse MQ results. Use at your own risk.");
}

rawMQdata = vc.getAttributeAsString(getDeprecatedRawKeyName(), null);
//the original version of ReblockGVCF produces a different MQ format -- try to handle that gracefully here just in case those files go through GenotypeGVCFs
if (vc.hasAttribute("MQ_DP")) {
Expand All @@ -138,7 +152,7 @@ else if (vc.hasAttribute(getDeprecatedRawKeyName())) {
}
else {
logger.warn("MQ annotation data is not properly formatted. This GATK version expects key "
+ getRawKeyName() + " with an long tuple of sum of squared MQ values and total reads over variant "
+ getRawKeyName() + " with a tuple of sum of squared MQ values and total reads over variant "
+ "genotypes as the value. Attempting to use deprecated MQ calculation.");
final long numOfReads = getNumOfReads(vc, null);
rawMQdata = Math.round(Double.parseDouble(rawMQdata)) + "," + numOfReads; //deprecated format was double so it needs to be converted to long
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
import org.broadinstitute.hellbender.CommandLineProgramTest;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.FeatureDataSource;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBImport;
import org.broadinstitute.hellbender.tools.walkers.annotator.RMSMappingQuality;
import org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
Expand Down Expand Up @@ -308,6 +310,20 @@ public void assertPPsAreStripped(File input, File expected, List<String> extraAr
runGenotypeGVCFSAndAssertSomething(input, expected, extraArgs, VariantContextTestUtils::assertGenotypePosteriorsAttributeWasRemoved, reference);
}

@Test(expectedExceptions = UserException.BadInput.class)
public void assertDeprecatedMQThrowsUserException() {
final File output = createTempFile("genotypegvcf", ".vcf");
// This old gatk3 output file contains the old MQ format
final File inputWithOldArgument = getTestFile( "combined.single.sample.pipeline.gatk3.vcf");
final ArgumentsBuilder args = new ArgumentsBuilder();
args.addReference(new File(b37_reference_20_21))
.addArgument("V", inputWithOldArgument.getAbsolutePath())
.addOutput(output);

// This is expected to fail because RMSMappingQuality.RMS_MAPPING_QUALITY_OLD_BEHAVIOR_OVERRIDE_ARGUMENT is not specified to allow old format MQ calculations.
runCommandLine(args);
}

//this test is separate because all the others use old data and ignore the MQ annotations
@Test(dataProvider = "GVCFsWithNewMQFormat")
public void assertNewMQWorks(File input, File expected, Locatable interval, String reference) throws IOException {
Expand All @@ -326,6 +342,7 @@ private void runGenotypeGVCFSAndAssertSomething(String input, File expected, Lis
final ArgumentsBuilder args = new ArgumentsBuilder();
args.addReference(new File(reference))
.addArgument("V", input)
.addArgument(RMSMappingQuality.RMS_MAPPING_QUALITY_OLD_BEHAVIOR_OVERRIDE_ARGUMENT)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add an actual test case covering the new argument, instead of just adding it unconditionally to this helper method for the GenotypeGVCFs tests.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

^I will add a test asserting that it spits out a user exception, its worth noting that the GenotypeGVCFs integration tests have a mixture of old and new RMSMappingQuality annotations attached to them, so this behavior has already been tested per se.

.addOutput(output);

additionalArguments.forEach(args::add);
Expand Down