Skip to content

Commit

Permalink
Possibly more elegant fix than #8715
Browse files Browse the repository at this point in the history
  • Loading branch information
ldgauthier committed Mar 6, 2024
1 parent b68fadc commit cdb8a9d
Show file tree
Hide file tree
Showing 15 changed files with 5,838 additions and 8,695 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,6 @@ public static double[] makeApproximateDiploidLog10LikelihoodsFromGQ(Genotype g,
}

public static boolean shouldBeCalled(final Genotype g) {
return !g.isNonInformative() || g.hasGQ();
return !g.isNonInformative() || (g.hasGQ() && g.getGQ() > 0);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,7 @@ public void testCombineReblockedGVCFs() {
Assert.assertFalse(g0.hasPL());
Assert.assertFalse(g0.hasGQ());
Assert.assertFalse(g0.hasExtendedAttribute(VCFConstants.DEPTH_KEY));
Assert.assertTrue(g1.isHomRef());
Assert.assertTrue(g1.isNoCall());
Assert.assertFalse(g1.hasPL());
Assert.assertEquals(g1.getGQ(), 0);
Assert.assertEquals(g1.getAnyAttribute(VCFConstants.DEPTH_KEY), 34);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -822,10 +822,11 @@ public void testWithReblockedGVCF() {
Assert.assertEquals(vc0.getAlternateAllele(0).getBaseString(), "G");
Assert.assertTrue(vc0.getGenotypes().stream().allMatch(g -> g.isCalled() && g.hasGQ() && g.hasDP()));

//reblocked GVCFs with no PLs have genotypes that will be assigned as no-calls because of GQ0, so AN and ExcessHet differ here
final VariantContext vc1 = outputVCs.get(1);
Assert.assertTrue(vc1.getAttributeAsDouble(GATKVCFConstants.EXCESS_HET_KEY, 1000.0) < 10.0); //will be ~72 if homRefs aren't counted
Assert.assertTrue(vc1.getAttributeAsDouble(GATKVCFConstants.EXCESS_HET_KEY, 0.0) > 50.0); //will be ~72 if homRefs aren't counted
Assert.assertTrue(vc1.hasAttribute(GATKVCFConstants.INBREEDING_COEFFICIENT_KEY));
Assert.assertEquals(vc1.getAttributeAsInt(VCFConstants.ALLELE_NUMBER_KEY, 0), 362);
Assert.assertEquals(vc1.getAttributeAsInt(VCFConstants.ALLELE_NUMBER_KEY, 0), 300);
Assert.assertEquals(vc1.getAlternateAlleles().size(), 3);
Assert.assertTrue(vc1.isIndel());
Assert.assertTrue(vc0.getGenotypes().stream().allMatch(g -> g.isCalled() && g.hasGQ() && g.hasDP()));
Expand All @@ -838,7 +839,7 @@ public void testWithReblockedGVCF() {
.addOutput(compareICvalues);
runCommandLine(args3);

//requires InbreedingCoeff to use isCalledAndDiploidWithLikelihoodsOrWithGQ for sampleNum
//highlight differences with and without PLs
final List<VariantContext> compareICvariants = VariantContextTestUtils.getVariantContexts(compareICvalues);
final VariantContext vcWithPLs = compareICvariants.get(0);
final VariantContext vcWithoutPLs = compareICvariants.get(1);
Expand All @@ -848,8 +849,9 @@ public void testWithReblockedGVCF() {
final double ic2 = vcWithoutPLs.getAttributeAsDouble(GATKVCFConstants.INBREEDING_COEFFICIENT_KEY, 0);
Assert.assertTrue(ic1 > 0); //make sure lookup worked, otherwise 0 == 0
Assert.assertTrue(ic2 > 0); //if GQ0s with no data are output as hom-ref, then ic2 is ~0.7
Assert.assertEquals(ic1, ic2, 0.1); //there will be some difference because the old version zeros out low depth hom-refs and makes them no-calls
Assert.assertEquals(vcWithoutPLs.getAttributeAsInt(VCFConstants.ALLELE_NUMBER_KEY, 0), 114); //don't count no-calls that are PL=[0,0,0] in classic VCF
Assert.assertTrue(ic1 - ic2 > .25); //there will be a significant difference because with noPLs, GQ0s become no-calls
Assert.assertEquals(vcWithPLs.getAttributeAsInt(VCFConstants.ALLELE_NUMBER_KEY, 0) -
vcWithoutPLs.getAttributeAsInt(VCFConstants.ALLELE_NUMBER_KEY, 0), 16); //don't count no-calls that are PL=[0,0,0] in classic VCF
}

@Test
Expand Down Expand Up @@ -976,4 +978,75 @@ public void dbSNPError() {
//make sure it doesn't throw an error
runCommandLine(args);
}

@Test
public void testNoReadsOutputAsNoCall() {
//The input data for this test comes from a GVCF produced from an empty region of one of the NA12878 test bams
// and a GVCF that was edited to have a variant so we can force that position to be output.
//note these are b37 data
File no_reads = new File(toolsTestDir, "/walkers/GenotypeGVCFs/combine.single.sample.pipeline.1.vcf");
File fake_variant = getTestFile("fake_sample2.vcf");
final SimpleInterval interval = new SimpleInterval("20", 10000000, 10000000);
File tempGdb = GenomicsDBTestUtils.createTempGenomicsDB(Arrays.asList(no_reads, fake_variant), interval);
final String genomicsDBUri = GenomicsDBTestUtils.makeGenomicsDBUri(tempGdb);

final File output = createTempFile("checkNoCall", ".vcf");

final ArgumentsBuilder args = new ArgumentsBuilder();
args.addReference(b37_reference_20_21)
.addFlag("allow-old-rms-mapping-quality-annotation-data") //old GVCFs have old annotations
.addVCF(genomicsDBUri)
.addInterval(interval)
.addOutput(output);
runCommandLine(args);

final Pair<VCFHeader, List<VariantContext>> outputData = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath());
Assert.assertEquals(outputData.getRight().size(), 1);
final VariantContext vc = outputData.getRight().get(0);
Assert.assertEquals(vc.getGenotypes().size(), 2);
final Genotype g = vc.getGenotype("GTEX-RVPV-0003");
Assert.assertTrue(g.isNoCall()); //most importantly!
Assert.assertTrue(g.hasGQ());
Assert.assertEquals(g.getGQ(), 0);
Assert.assertTrue(g.hasDP());
Assert.assertEquals(g.getDP(), 0);
Assert.assertTrue(g.hasAD());
Assert.assertEquals(g.getAD(), new int[2]);
Assert.assertTrue(g.hasPL());
Assert.assertEquals(g.getPL(), new int[3]);
}

@Test
public void testNoReadsReblockedOutputAsNoCall() {
//There's a very similar test for Gnarly, but we expect the outputs to be a bit different here
//note these are b37 data
File no_reads = new File(toolsTestDir, "/walkers/GnarlyGenotyper/testNoReads.rb.g.vcf");
//this is an artisanal, hand-crafted VCF with a QUAL approx that's been artificially enhanced
File fake_variant = new File(toolsTestDir, "/walkers/GnarlyGenotyper/fake_sample2.rb.g.vcf");
final SimpleInterval interval = new SimpleInterval("20", 10000000, 10000000);
File tempGdb = GenomicsDBTestUtils.createTempGenomicsDB(Arrays.asList(no_reads, fake_variant), interval);
final String genomicsDBUri = GenomicsDBTestUtils.makeGenomicsDBUri(tempGdb);

final File output = createTempFile("checkNoCall", ".vcf");

final ArgumentsBuilder args = new ArgumentsBuilder();
args.addReference(b37_reference_20_21)
.addVCF(genomicsDBUri)
.addInterval(interval)
.addOutput(output);
runCommandLine(args);

final Pair<VCFHeader, List<VariantContext>> outputData = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath());
Assert.assertEquals(outputData.getRight().size(), 1);
final VariantContext vc = outputData.getRight().get(0);
Assert.assertEquals(vc.getGenotypes().size(), 2);
final Genotype g = vc.getGenotype("GTEX-RVPV-0003");
Assert.assertTrue(g.isNoCall()); //most importantly!
Assert.assertTrue(g.hasGQ());
Assert.assertEquals(g.getGQ(), 0);
Assert.assertTrue(g.hasDP());
Assert.assertEquals(g.getDP(), 0);
Assert.assertTrue(g.hasAD()); //this is different from Gnarly behavior
Assert.assertFalse(g.hasPL());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -245,4 +245,37 @@ public void testHaploidInput() {
Assert.assertEquals(g.getPL().length, 2);
Assert.assertEquals(g.getPL(), new int[]{83,0});
}

@Test
public void testNoReadsOutputAsNoCall() {
//note these are b37 data
File no_reads = new File(toolsTestDir, "/walkers/GnarlyGenotyper/testNoReads.rb.g.vcf");
//this is an artisanal, hand-crafted VCF with a QUAL approx that's been artificially enhanced
File fake_variant = new File(toolsTestDir, "/walkers/GnarlyGenotyper/fake_sample2.rb.g.vcf");
final SimpleInterval interval = new SimpleInterval("20", 10000000, 10000000);
File tempGdb = GenomicsDBTestUtils.createTempGenomicsDB(Arrays.asList(no_reads, fake_variant), interval);
final String genomicsDBUri = GenomicsDBTestUtils.makeGenomicsDBUri(tempGdb);

final File output = createTempFile("checkNoCall", ".vcf");

final ArgumentsBuilder args = new ArgumentsBuilder();
args.addReference(b37_reference_20_21)
.addVCF(genomicsDBUri)
.addInterval(interval)
.addOutput(output);
runCommandLine(args);

final Pair<VCFHeader, List<VariantContext>> outputData = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath());
Assert.assertEquals(outputData.getRight().size(), 1);
final VariantContext vc = outputData.getRight().get(0);
Assert.assertEquals(vc.getGenotypes().size(), 2);
final Genotype g = vc.getGenotype("GTEX-RVPV-0003");
Assert.assertTrue(g.isNoCall()); //most importantly!
Assert.assertTrue(g.hasGQ());
Assert.assertEquals(g.getGQ(), 0);
Assert.assertTrue(g.hasDP());
Assert.assertEquals(g.getDP(), 0);
Assert.assertFalse(g.hasAD());
Assert.assertFalse(g.hasPL());
}
}
Loading

0 comments on commit cdb8a9d

Please sign in to comment.