Skip to content

Commit

Permalink
Added an explicit override to RMSMappingQuality annotation to enable …
Browse files Browse the repository at this point in the history
…old code fallback. (#6060)
  • Loading branch information
jamesemery authored Jul 31, 2019
1 parent 3450a81 commit baab529
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
import htsjdk.variant.vcf.VCFStandardHeaderLines;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.ReadFilterArgumentDefinitions;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.ReducibleAnnotation;
Expand Down Expand Up @@ -55,6 +57,10 @@ public final class RMSMappingQuality extends InfoFieldAnnotation implements Stan
public static final int NUM_LIST_ENTRIES = 2;
public static final int SUM_OF_SQUARES_INDEX = 0;
public static final int TOTAL_DEPTH_INDEX = 1;
public static final String RMS_MAPPING_QUALITY_OLD_BEHAVIOR_OVERRIDE_ARGUMENT = "allow-old-rms-mapping-quality-annotation-data";

@Argument(fullName = RMS_MAPPING_QUALITY_OLD_BEHAVIOR_OVERRIDE_ARGUMENT, doc="Override to allow old RMSMappingQuality annotated VCFs to function", optional=true)
public boolean allowOlderRawKeyValues = false;

@Override
public String getRawKeyName() { return GATKVCFConstants.RAW_MAPPING_QUALITY_WITH_DEPTH_KEY;} //new key for the two-value MQ data to prevent version mismatch catastrophes
Expand Down Expand Up @@ -124,6 +130,14 @@ public Map<String, Object> finalizeRawData(final VariantContext vc, final Varian
rawMQdata = vc.getAttributeAsString(getRawKeyName(), null);
}
else if (vc.hasAttribute(getDeprecatedRawKeyName())) {
if (!allowOlderRawKeyValues) {
throw new UserException.BadInput("Presence of '-"+getDeprecatedRawKeyName()+"' annotation is detected. This GATK version expects key "
+ getRawKeyName() + " with a tuple of sum of squared MQ values and total reads over variant "
+ "genotypes as the value. This could indicate that the provided input was produced with an older version of GATK. " +
"Use the argument '--"+RMS_MAPPING_QUALITY_OLD_BEHAVIOR_OVERRIDE_ARGUMENT+"' to override and attempt the deprecated MQ calculation. There " +
"may be differences in how newer GATK versions calculate DP and MQ that may result in worse MQ results. Use at your own risk.");
}

rawMQdata = vc.getAttributeAsString(getDeprecatedRawKeyName(), null);
//the original version of ReblockGVCF produces a different MQ format -- try to handle that gracefully here just in case those files go through GenotypeGVCFs
if (vc.hasAttribute("MQ_DP")) {
Expand All @@ -138,7 +152,7 @@ else if (vc.hasAttribute(getDeprecatedRawKeyName())) {
}
else {
logger.warn("MQ annotation data is not properly formatted. This GATK version expects key "
+ getRawKeyName() + " with an long tuple of sum of squared MQ values and total reads over variant "
+ getRawKeyName() + " with a tuple of sum of squared MQ values and total reads over variant "
+ "genotypes as the value. Attempting to use deprecated MQ calculation.");
final long numOfReads = getNumOfReads(vc, null);
rawMQdata = Math.round(Double.parseDouble(rawMQdata)) + "," + numOfReads; //deprecated format was double so it needs to be converted to long
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
import org.broadinstitute.hellbender.CommandLineProgramTest;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.FeatureDataSource;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBImport;
import org.broadinstitute.hellbender.tools.walkers.annotator.RMSMappingQuality;
import org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
Expand Down Expand Up @@ -308,6 +310,20 @@ public void assertPPsAreStripped(File input, File expected, List<String> extraAr
runGenotypeGVCFSAndAssertSomething(input, expected, extraArgs, VariantContextTestUtils::assertGenotypePosteriorsAttributeWasRemoved, reference);
}

@Test(expectedExceptions = UserException.BadInput.class)
public void assertDeprecatedMQThrowsUserException() {
final File output = createTempFile("genotypegvcf", ".vcf");
// This old gatk3 output file contains the old MQ format
final File inputWithOldArgument = getTestFile( "combined.single.sample.pipeline.gatk3.vcf");
final ArgumentsBuilder args = new ArgumentsBuilder();
args.addReference(new File(b37_reference_20_21))
.addArgument("V", inputWithOldArgument.getAbsolutePath())
.addOutput(output);

// This is expected to fail because RMSMappingQuality.RMS_MAPPING_QUALITY_OLD_BEHAVIOR_OVERRIDE_ARGUMENT is not specified to allow old format MQ calculations.
runCommandLine(args);
}

//this test is separate because all the others use old data and ignore the MQ annotations
@Test(dataProvider = "GVCFsWithNewMQFormat")
public void assertNewMQWorks(File input, File expected, Locatable interval, String reference) throws IOException {
Expand All @@ -326,6 +342,7 @@ private void runGenotypeGVCFSAndAssertSomething(String input, File expected, Lis
final ArgumentsBuilder args = new ArgumentsBuilder();
args.addReference(new File(reference))
.addArgument("V", input)
.addArgument(RMSMappingQuality.RMS_MAPPING_QUALITY_OLD_BEHAVIOR_OVERRIDE_ARGUMENT)
.addOutput(output);

additionalArguments.forEach(args::add);
Expand Down

0 comments on commit baab529

Please sign in to comment.