Skip to content

Commit

Permalink
responded to reviewer comments
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesemery committed Jan 11, 2022
1 parent df424c8 commit ce3a00c
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,25 @@

import org.broadinstitute.barclay.argparser.Advanced;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.Hidden;

/**
* Set of arguments for configuring the pileup detection code
*/
public final class PileupDetectionArgumentCollection {

public static final String PILEUP_DETECTION_LONG_NAME = "pileup-detection";
public static final String PILEUP_DETECTION_ENABLE_INDELS = "pileup-detection-enable-indel-detection";
public static final String PILEUP_DETECTION_SNP_THRESHOLD = "pileup-detection-enable-snp-alt-threshold";
public static final String PILEUP_DETECTION_ENABLE_INDELS = "pileup-detection-enable-indel-pileup-calling";
public static final String PILEUP_DETECTION_SNP_THRESHOLD = "pileup-detection-snp-alt-threshold";
public static final String PILEUP_DETECTION_ABSOLUTE_ALT_DEPTH = "pileup-detection-absolute-alt-depth";
public static final String PILEUP_DETECTION_INDEL_THRESHOLD = "pileup-detection-enable-indel-alt-threshold";
public static final String PILEUP_DETECTION_INDEL_THRESHOLD = "pileup-detection-indel-alt-threshold";
public static final String PILEUP_DETECTION_FILTER_COVERAGE_LONG_NAME = "pileup-detection-filter-coverage-threshold";

//TODO we currently don't see the same threshold from active region determination...
public static final String PILEUP_DETECTION_ACETIVE_REGION_LOD_THRESHOLD_LONG_NAME = "pileup-detection-active-region-lod-threshold";


// Arguments related to DRAGEN heuristics related to "read badness" intended to filter out false positives from the pileup deteciotn code
// Arguments related to DRAGEN heuristics related to "read badness" intended to filter out false positives from the pileup detection code
public static final String PILEUP_DETECTION_BAD_READ_RATIO_LONG_NAME = "pileup-detection-bad-read-tolerance";
public static final String PILEUP_DETECTION_PROPER_PAIR_READ_BADNESS_LONG_NAME = "pileup-detection-proper-pair-read-badness";
public static final String PILEUP_DETECTION_EDIT_DISTANCE_BADNESS_LONG_NAME = "pileup-detection-edit-distance-read-badness-threshold";
Expand All @@ -36,33 +37,34 @@ public final class PileupDetectionArgumentCollection {
*
* NOTE: --pileup-detection is a beta feature. Use this mode at your own risk.
*/
@Advanced
@Argument(fullName= PILEUP_DETECTION_LONG_NAME, doc = "If enabled, the variant caller will create pileup-based haplotypes in addition to the assembly-based haplotype generation.", optional = true)
public boolean usePileupDetection = false;

/**
* Enables detection of indels from the pileups in. (EXPERIMENTAL FEATURE)
*/
@Advanced
@Argument(fullName= PILEUP_DETECTION_ENABLE_INDELS, doc = "If enabled, pileup detection code will attempt to detect indels missing from assembly. (Requires `--pileup-detection` argument)", optional = true)
@Hidden
@Argument(fullName= PILEUP_DETECTION_ENABLE_INDELS, doc = "Pileup Detection: If enabled, pileup detection code will attempt to detect indels missing from assembly. (Requires `--pileup-detection` argument)", optional = true)
public boolean detectIndels = false;


/**
* Percentage of reads required to supprot the alt for a variant to be considered
* Percentage of reads required to support the alt for a variant to be considered
*/
@Advanced
@Argument(fullName= PILEUP_DETECTION_SNP_THRESHOLD, doc = "Percentage of alt supporting reads in order to consider alt SNP", optional = true)
@Hidden
@Argument(fullName= PILEUP_DETECTION_SNP_THRESHOLD, doc = "Pileup Detection: Percentage of alt supporting reads in order to consider alt SNP", optional = true)
public double snpThreshold = 0.1;
@Advanced
@Argument(fullName= PILEUP_DETECTION_INDEL_THRESHOLD, doc = "Percentage of alt supporting reads in order to consider alt indel", optional = true)
@Hidden
@Argument(fullName= PILEUP_DETECTION_INDEL_THRESHOLD, doc = "Pileup Detection: Percentage of alt supporting reads in order to consider alt indel", optional = true)
public double indelThreshold = 0.5;


@Advanced
@Argument(fullName= PILEUP_DETECTION_ABSOLUTE_ALT_DEPTH, doc = "Absolute number of alt reads necessary to be included in pileup events", optional = true)
@Hidden
@Argument(fullName= PILEUP_DETECTION_ABSOLUTE_ALT_DEPTH, doc = "Pileup Detection: Absolute number of alt reads necessary to be included in pileup events", optional = true)
public double pileupAbsoluteDepth = 0;
@Advanced
@Argument(fullName= PILEUP_DETECTION_INDEL_SNP_BLOCKING_RANGE, doc = "Filters out pileup snps within this many bases of an assembled indel", optional = true)
@Hidden
@Argument(fullName= PILEUP_DETECTION_INDEL_SNP_BLOCKING_RANGE, doc = "Pileup Detection: Filters out pileup snps within this many bases of an assembled indel", optional = true)
public int snpAdajacentToAssemblyIndel = 5;


Expand All @@ -71,22 +73,22 @@ public final class PileupDetectionArgumentCollection {
/**
* Arguments related to the "bad read filtering" where alleles that are supported primarily by reads that fail at least one of a number of heuristics will be filtered out
*/
@Advanced
@Argument(fullName= PILEUP_DETECTION_BAD_READ_RATIO_LONG_NAME, doc = "Thresold of Alt reads rejected by bad reads heuristics to allow the variant", optional = true)
@Hidden
@Argument(fullName= PILEUP_DETECTION_BAD_READ_RATIO_LONG_NAME, doc = "Pileup Detection: Threshold of Alt reads rejected by bad reads heuristics to allow the variant", optional = true)
public double badReadThreshold = 0.0;
@Advanced
@Argument(fullName= PILEUP_DETECTION_PROPER_PAIR_READ_BADNESS_LONG_NAME, doc = "Reject Alt reads not in proper-pairs", optional = true)
@Hidden
@Argument(fullName= PILEUP_DETECTION_PROPER_PAIR_READ_BADNESS_LONG_NAME, doc = "Pileup Detection: Reject alt reads not in proper-pairs", optional = true)
public boolean badReadProperPair = true;
@Advanced
@Argument(fullName= PILEUP_DETECTION_EDIT_DISTANCE_BADNESS_LONG_NAME, doc = "Reject alt reads with greater than this percent edit distance from the reference", optional = true)
@Hidden
@Argument(fullName= PILEUP_DETECTION_EDIT_DISTANCE_BADNESS_LONG_NAME, doc = "Pileup Detection: Reject alt reads with greater than this fraction edit distance from the reference", optional = true)
public double badReadEditDistance = 0.08;
@Advanced
@Argument(fullName= PILEUP_DETECTION_CHIMERIC_READ_BADNESS_LONG_NAME, doc = "Reject reads that are chimeric or supplementary", optional = true)
public boolean badReadSecondary = true;
@Advanced
@Argument(fullName= PILEUP_DETECTION_TLEN_MEAN_LONG_NAME, doc = "Mean template length to consider for read badness. Requires '--"+PILEUP_DETECTION_TLEN_STD_LONG_NAME+"' to also be set.", optional = true)
@Hidden
@Argument(fullName= PILEUP_DETECTION_CHIMERIC_READ_BADNESS_LONG_NAME, doc = "Pileup Detection: Reject reads that are chimeric or supplementary", optional = true)
public boolean badReadSecondaryOrSupplementary = true;
@Hidden
@Argument(fullName= PILEUP_DETECTION_TLEN_MEAN_LONG_NAME, doc = "Pileup Detection: Mean template length (T LEN) to consider for read badness. Requires '--"+PILEUP_DETECTION_TLEN_STD_LONG_NAME+"' to also be set.", optional = true)
public double templateLengthMean = 0.0;
@Advanced
@Argument(fullName= PILEUP_DETECTION_TLEN_STD_LONG_NAME, doc = "Standard deviation template to consider for read badness. Requires '--"+PILEUP_DETECTION_TLEN_MEAN_LONG_NAME+"' to also be set.", optional = true)
public double templateLenghtStd = 0.0;
@Hidden
@Argument(fullName= PILEUP_DETECTION_TLEN_STD_LONG_NAME, doc = "Pileup Detection: Standard deviation template length (T LEN) to consider for read badness. Requires '--"+PILEUP_DETECTION_TLEN_MEAN_LONG_NAME+"' to also be set.", optional = true)
public double templateLengthStd = 0.0;
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ public final class PileupBasedAlleles {
* that are visible in the pileups but might be dropped from assembly for any number of reasons. The basic algorithm works
* as follows:
* - iterate over every pileup and count alt bases
* - (unfinished) detect insertions overlapping this site (CURRENTLY ONLY WORKS FOR INSERTIONS)
* - count "bad" reads as defined by Illumina filtering for puleup detection of variants {@Link #evaluateBadRead}
* - (beta) detect insertions overlapping this site (CURRENTLY ONLY WORKS FOR INSERTIONS)
* - count "bad" reads as defined by Illumina filtering for pileup detection of variants {@Link #evaluateBadRead}
* - For each detected alt, evaluate if the number of alternate bases are sufficient to make the call and make a VariantContext.
*
* @param alignmentAndReferenceContextList List of stored pileups and reference context information where every element is a base from the active region.
Expand All @@ -40,7 +40,7 @@ public final class PileupBasedAlleles {
*/
public static ArrayList<VariantContext> getPileupVariantContexts(final List<AlignmentAndReferenceContext> alignmentAndReferenceContextList, final PileupDetectionArgumentCollection args, final SAMFileHeader headerForReads) {

final ArrayList<VariantContext> pileupSNPsList = new ArrayList<>();
final ArrayList<VariantContext> pileupVariantList = new ArrayList<>();

// Iterate over every base
for(AlignmentAndReferenceContext alignmentAndReferenceContext : alignmentAndReferenceContextList) {
Expand All @@ -60,8 +60,7 @@ public static ArrayList<VariantContext> getPileupVariantContexts(final List<Alig
for (PileupElement element : pileup) {
final byte eachBase = element.getBase();

// check to see that the base is not ref and that the alleles are one of these bases - ATCGN
// TODO: AH & BG add a better check for acceptable alleles for eachBase
// check to see that the base is not ref (and non-deletion) and increment the alt counts (and evaluate if the read is "bad")
if (refBase != eachBase && eachBase != 'D') {
incrementAltCount(eachBase, altCounts);
totalAltReads++;
Expand Down Expand Up @@ -97,7 +96,7 @@ && passesFilters(args, false, numOfBases, totalAltBadReads, totalAltReads, maxAl

alleles.add(Allele.create(maxAlt.get().getKey()));
final VariantContextBuilder pileupSNP = new VariantContextBuilder("pileup", alignmentContext.getContig(), alignmentContext.getStart(), alignmentContext.getEnd(), alleles);
pileupSNPsList.add(pileupSNP.make());
pileupVariantList.add(pileupSNP.make());
}

// Evaluate the detected INDEL alleles for this site
Expand All @@ -110,17 +109,17 @@ && passesFilters(args, true, numOfBases, totalAltBadReads, totalAltReads, maxIns

indelAlleles.add(Allele.create((char)referenceContext.getBase() + maxIns.get().getKey()));
final VariantContextBuilder pileupInsertion = new VariantContextBuilder("pileup", alignmentContext.getContig(), alignmentContext.getStart(), alignmentContext.getEnd(), indelAlleles);
pileupSNPsList.add(pileupInsertion.make());
pileupVariantList.add(pileupInsertion.make());
}
}
}

return pileupSNPsList;
return pileupVariantList;
}

/**
* Apply the filters to discovered alleles
* - Does it have greater than snpThreshold percentage of bases support in the pileups?
* - Does it have greater than snpThreshold fraction of bases support in the pileups?
* - Does it have greater than pileupAbsoluteDepth number of reads supporting it?
* - Are the reads supporting alts at the site greater than badReadThreshold percent "good"? //TODO evaluate if this is worth doing on a per-allele basis or otherwise
*/
Expand All @@ -141,20 +140,20 @@ private static boolean passesFilters(final PileupDetectionArgumentCollection arg
* @param read
* @param referenceContext
* @param args
* @param headerForRead TODO get rid of this sam record coversion
* @return
* @param headerForRead TODO get rid of this sam record conversion
* @return true if any of the "badness" heuristics suggest we should consider the read suspect, false otherwise.
*/
@VisibleForTesting
static boolean evaluateBadRead(final GATKRead read, final ReferenceContext referenceContext, final PileupDetectionArgumentCollection args, final SAMFileHeader headerForRead) {
if (args.badReadProperPair && !read.isProperlyPaired()) {
return true;
}
if (args.badReadSecondary && (read.isSecondaryAlignment() || read.hasAttribute("SA"))) {
if (args.badReadSecondaryOrSupplementary && (read.isSecondaryAlignment() || read.hasAttribute("SA"))) {
return true;
}


//TODO this conversion is really unnecessary...
//TODO this conversion is really unnecessary. Perhaps we should expose a new SequenceUtil like NM tag calculation?...
SAMRecord samRecordForRead = read.convertToSAMRecord(headerForRead);

// Assert that the edit distance for the read is in line
Expand All @@ -169,11 +168,11 @@ static boolean evaluateBadRead(final GATKRead read, final ReferenceContext refer
}

//TODO add threshold descibed by illumina about insert size compared to the average
if (args.templateLenghtStd > 0 && args.templateLengthMean > 0) {
if (args.templateLengthStd > 0 && args.templateLengthMean > 0) {
int templateLength = samRecordForRead.getInferredInsertSize();
// This is an illumina magic number... possibly decide to
if (templateLength < args.templateLengthMean - 2.25*args.templateLenghtStd
|| templateLength > args.templateLengthMean + 2.25*args.templateLenghtStd) {
// This is an illumina magic number... Its possible none of this is particularly important for Functional Equivalency.
if (templateLength < args.templateLengthMean - 2.25 * args.templateLengthStd
|| templateLength > args.templateLengthMean + 2.25 * args.templateLengthStd) {
return true;
}
}
Expand Down

0 comments on commit ce3a00c

Please sign in to comment.