Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restore base quality filter code that got reverted in #4895 #5123

Merged
merged 1 commit into from
Aug 31, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,10 @@ private static void applyPanelOfNormalsFilter(final M2FiltersArgumentCollection

private void applyBaseQualityFilter(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, final FilterResult filterResult) {
final int[] baseQualityByAllele = getIntArrayTumorField(vc, BaseQuality.KEY);
if (baseQualityByAllele != null && baseQualityByAllele[0] < MTFAC.minMedianBaseQuality) {
final double[] tumorLods = getDoubleArrayAttribute(vc, GATKVCFConstants.TUMOR_LOD_KEY);
final int indexOfMaxTumorLod = MathUtils.maxElementIndex(tumorLods);

if (baseQualityByAllele != null && baseQualityByAllele[indexOfMaxTumorLod + 1] < MTFAC.minMedianBaseQuality) {
filterResult.addFilter(GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME);
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package org.broadinstitute.hellbender.tools.walkers.mutect;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMReadGroupRecord;
import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.read.ReadUtils;
import org.broadinstitute.hellbender.utils.read.SAMFileGATKReadWriter;

import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

public class M2TestingUtils {
private static final String DEFAULT_READ_GROUP_NAME = "READ_GROUP_1";
/**
*
* Reference at this position looks like this:
* coord.: 99,990 100,000 100,010 100,020
* | | | |
* reference: T C A T C A C A C T C A C T A A G C A C A C A G A G A A T A A T
* alt read: T C A T C A C A C T C T C T G A C C A A A G A G A A T A A T A A
*/
private static final int DEFAULT_ALIGNMENT_START = 99_991;
private static final int DEFAULT_CHROM_INDEX = 0;
private static final byte DEFAULT_BASEQ = 26;
private static final int DEFAULT_MAPQ = 60;
private static final int DEFAULT_NUM_CHROMOSOMES = 1;
private static final int DEFAULT_STARTING_CHROM = 1;
private static final int DEFAULT_CHROMOSOME_SIZE = 1_000_000;
public static final byte[] DEFAULT_REF_BASES = "CATCACACTCACTAAGCACACAGAGAATAAT".getBytes();
// Bases for C->T SNP at position 100,000 *
public static final byte[] DEFAULT_ALT_BASES = "CATCACACTTACTAAGCACACAGAGAATAAT".getBytes();
public static final int DEFAULT_SNP_POSITION = 100_000;
public static final String DEFAULT_SAMPLE_NAME = "sample1";

public static SAMFileGATKReadWriter getBareBonesSamWriter(final File samFile, final SAMFileHeader samHeader) {
return new SAMFileGATKReadWriter(ReadUtils.createCommonSAMWriter(samFile, null, samHeader, true, true, false));
}

// TODO: might have to address read name collisions
public static List<GATKRead> createReads(final int numReads, final byte[] bases, final SAMFileHeader samHeader,
final byte baseQuality){
final int readLength = bases.length;
final byte[] quals = new byte[readLength];
Arrays.fill(quals, baseQuality);

final List<GATKRead> reads = new ArrayList<>(numReads);
for (int i = 0; i < numReads; i++) {
final String cigar = bases.length + "M";
final GATKRead read = ArtificialReadUtils.createArtificialRead(samHeader, "Read" + i, DEFAULT_CHROM_INDEX,
DEFAULT_ALIGNMENT_START, bases, quals, cigar);
read.setReadGroup(DEFAULT_READ_GROUP_NAME);
read.setMappingQuality(DEFAULT_MAPQ);
read.setIsFirstOfPair();
read.setIsReverseStrand(i % 2 == 0);
reads.add(read);
}

return reads;
}

public static List<GATKRead> createReads(final int numReads, final byte[] bases, final SAMFileHeader samHeader){
return createReads(numReads, bases, samHeader, DEFAULT_BASEQ);
}

public static SAMFileHeader createSamHeader(){
final SAMFileHeader samHeader = ArtificialReadUtils.createArtificialSamHeader(
DEFAULT_NUM_CHROMOSOMES, DEFAULT_STARTING_CHROM, DEFAULT_CHROMOSOME_SIZE);
final SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord(DEFAULT_READ_GROUP_NAME);
readGroupRecord.setSample(DEFAULT_SAMPLE_NAME);
samHeader.addReadGroup(readGroupRecord);
return samHeader;
}

}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.broadinstitute.hellbender.tools.walkers.mutect;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SamFiles;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
Expand All @@ -18,18 +19,17 @@
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
import org.broadinstitute.hellbender.testutils.VariantContextTestUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.read.SAMFileGATKReadWriter;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import scala.tools.nsc.transform.patmat.ScalaLogic;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;

Expand Down Expand Up @@ -573,6 +573,45 @@ public void testBamoutVariations(final boolean createBamout, final boolean creat
);
}

@Test()
public void testBaseQualityFilter() throws IOException {
// Create a test sam file
final File samFile = File.createTempFile("synthetic", ".bam");
final SAMFileHeader samHeader = M2TestingUtils.createSamHeader();
final SAMFileGATKReadWriter writer = M2TestingUtils.getBareBonesSamWriter(samFile, samHeader);

final byte poorQuality = 10;
final byte goodQuality = 30;
final int numReads = 20;
final List<GATKRead> refReads = M2TestingUtils.createReads(numReads, M2TestingUtils.DEFAULT_REF_BASES, samHeader, poorQuality);
final List<GATKRead> alt1Reads = M2TestingUtils.createReads(numReads, M2TestingUtils.DEFAULT_ALT_BASES, samHeader, goodQuality);

refReads.forEach(writer::addRead);
alt1Reads.forEach(writer::addRead);
writer.close(); // closing the writer writes to the file
// End creating sam file

final File unfilteredVcf = File.createTempFile("unfiltered", ".vcf");
final List<String> args = Arrays.asList(
"-I", samFile.getAbsolutePath(),
"-" + M2ArgumentCollection.TUMOR_SAMPLE_SHORT_NAME, M2TestingUtils.DEFAULT_SAMPLE_NAME,
"-R", hg19_chr1_1M_Reference,
"-O", unfilteredVcf.getAbsolutePath());
runCommandLine(args);

final File filteredVcf = File.createTempFile("filtered", ".vcf");
final String[] filteringArgs = makeCommandLineArgs(Arrays.asList(
"-V", unfilteredVcf.getAbsolutePath(),
"-O", filteredVcf.getAbsolutePath()),
FilterMutectCalls.class.getSimpleName());
new Main().instanceMain(filteringArgs);

final Optional<VariantContext> vc = VariantContextTestUtils.streamVcf(filteredVcf).findAny();
Assert.assertTrue(vc.isPresent());
Assert.assertEquals(vc.get().getStart(), M2TestingUtils.DEFAULT_SNP_POSITION);
Assert.assertFalse(vc.get().getFilters().contains(GATKVCFConstants.MEDIAN_BASE_QUALITY_FILTER_NAME));
}

private void doMutect2Test(
final String inputBam,
final String tumorSample,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import htsjdk.samtools.util.Histogram;
import htsjdk.samtools.util.IOUtil;
import org.broadinstitute.hellbender.CommandLineProgramTest;
import org.broadinstitute.hellbender.tools.walkers.mutect.M2TestingUtils;
import org.broadinstitute.hellbender.utils.genotyper.IndexedSampleList;
import org.broadinstitute.hellbender.utils.genotyper.SampleList;
import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils;
Expand Down Expand Up @@ -64,8 +65,8 @@ public void testOnSyntheticBam() throws IOException {
*
* coord.: 99,990 100,000 100,010 100,020
* | | | |
* reference: | T C A T C A C A C T C A C T A A G C A C A C A G A G A A T A A T
* alt read: | T C A T C A C A C T C T C T G A C C A A A G A G A A T A A T A A
* reference: T C A T C A C A C T C A C T A A G C A C A C A G A G A A T A A T
* alt read: T C A T C A C A C T C T C T G A C C A A A G A G A A T A A T A A
* * * * *
*
* At each alt site, we have 75 ref and 25 alt reads
Expand Down Expand Up @@ -157,7 +158,6 @@ public void testHistograms() throws IOException {
final File altTable = createTempFile("alt", ".table");
final File sam = createSyntheticSam(30, 1);


final String[] args = {
"-R", hg19_chr1_1M_Reference,
"-I", sam.getAbsolutePath(),
Expand Down Expand Up @@ -189,50 +189,16 @@ public void testHistograms() throws IOException {
}

private File createSyntheticSam(final int refDepth, final int altDepth) throws IOException {
// create a sam header
final int alignmentStart = 99_991;
final int numChromosomes = 1;
final int startingChromosome = 1;
final int chromosomeSize = 1_000_000;
final String readGroupName = "HELLO.1";
final SAMFileHeader samHeader = ArtificialReadUtils.createArtificialSamHeader(
numChromosomes, startingChromosome, chromosomeSize);
samHeader.addReadGroup(new SAMReadGroupRecord(readGroupName));

// create a sample list
final int chromosomeIndex = 0;
final String sampleName = "samthreetree";
final SampleList sampleList = new IndexedSampleList(sampleName);

// specify characteristics of reads
final int depth = altDepth + refDepth;

final byte[] refReadBases = "CATCACACTCACTAAGCACACAGAGAATAAT".getBytes();
final File samFile = File.createTempFile("synthetic", ".bam");
final SAMFileHeader samHeader = M2TestingUtils.createSamHeader();
final SAMFileGATKReadWriter writer = M2TestingUtils.getBareBonesSamWriter(samFile, samHeader);
// Ref Sequence: "CATCACACTCACTAAGCACACAGAGAATAAT".getBytes();
// SNPs positions: * * * *
final byte[] altReadBases = "CATCACACTCTCTGACCAAACAGAGAATAAT".getBytes();
final int readLength = refReadBases.length;
final byte baseq = 26;
final byte[] quals = new byte[readLength];
Arrays.fill(quals, baseq);
final int mapq = 60;

// create reads
final List<GATKRead> reads = new ArrayList<>(depth);
for (int i = 0; i < depth; i++) {
final byte[] bases = i < altDepth ? altReadBases : refReadBases;
final String cigar = refReadBases.length + "M";
final GATKRead read = ArtificialReadUtils.createArtificialRead(samHeader, "Read" + i, chromosomeIndex,
alignmentStart, bases, quals, cigar);
read.setReadGroup(readGroupName);
read.setMappingQuality(mapq);
read.setIsFirstOfPair();
read.setIsReverseStrand(i % 2 == 0);
reads.add(read);
}

final File samFile = File.createTempFile("synthetic", ".sam");
final SAMFileGATKReadWriter writer = new SAMFileGATKReadWriter(
ReadUtils.createCommonSAMWriter(samFile, null, samHeader, true, false, false));
reads.forEach(writer::addRead);
final List<GATKRead> refReads = M2TestingUtils.createReads(refDepth, M2TestingUtils.DEFAULT_REF_BASES, samHeader, (byte)30);
final List<GATKRead> alt1Reads = M2TestingUtils.createReads(altDepth, altReadBases, samHeader, (byte)30);
refReads.forEach(writer::addRead);
alt1Reads.forEach(writer::addRead);
writer.close(); // closing the writer writes to the file

return samFile;
Expand Down