Skip to content

Commit

Permalink
added some relatively untested arguments to comply with DRAGEN-Column…
Browse files Browse the repository at this point in the history
…wiseDetection code
  • Loading branch information
jamesemery committed Jan 11, 2022
1 parent 1a81c76 commit df424c8
Show file tree
Hide file tree
Showing 15 changed files with 738 additions and 233 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.samtools.util.Locatable;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.walkers.mutect.AlignmentAndReferenceContext;
import org.broadinstitute.hellbender.utils.IntervalUtils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@
import org.broadinstitute.barclay.argparser.Advanced;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.ArgumentCollection;
import org.broadinstitute.gatk.nativebindings.smithwaterman.SWParameters;
import org.broadinstitute.hellbender.engine.FeatureInput;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;
import org.broadinstitute.hellbender.utils.haplotype.HaplotypeBAMWriter;
import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanAligner;
import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanAlignmentConstants;

/**
* Set of arguments for Assembly Based Callers
Expand All @@ -36,26 +34,6 @@ public abstract class AssemblyBasedCallerArgumentCollection {

public static final String PILEUP_DETECTION_LONG_NAME = "pileup-detection";

public static final String SMITH_WATERMAN_DANGLING_END_MATCH_VALUE_LONG_NAME = "smith-waterman-dangling-end-match-value";
public static final String SMITH_WATERMAN_DANGLING_END_MISMATCH_PENALTY_LONG_NAME = "smith-waterman-dangling-end-mismatch-penalty";
public static final String SMITH_WATERMAN_DANGLING_END_GAP_OPEN_PENALTY_LONG_NAME = "smith-waterman-dangling-end-gap-open-penalty";
public static final String SMITH_WATERMAN_DANGLING_END_GAP_EXTEND_PENALTY_LONG_NAME = "smith-waterman-dangling-end-gap-extend-penalty";
public static final String SMITH_WATERMAN_HAPLOTYPE_TO_REFERENCE_MATCH_VALUE_LONG_NAME = "smith-waterman-haplotype-to-reference-match-value";
public static final String SMITH_WATERMAN_HAPLOTYPE_TO_REFERENCE_MISMATCH_PENALTY_LONG_NAME = "smith-waterman-haplotype-to-reference-mismatch-penalty";
public static final String SMITH_WATERMAN_HAPLOTYPE_TO_REFERENCE_GAP_OPEN_PENALTY_LONG_NAME = "smith-waterman-haplotype-to-reference-gap-open-penalty";
public static final String SMITH_WATERMAN_HAPLOTYPE_TO_REFERENCE_GAP_EXTEND_PENALTY_LONG_NAME = "smith-waterman-haplotype-to-reference-gap-extend-penalty";
public static final String SMITH_WATERMAN_READ_TO_HAPLOTYPE_MATCH_VALUE_LONG_NAME = "smith-waterman-read-to-haplotype-match-value";
public static final String SMITH_WATERMAN_READ_TO_HAPLOTYPE_MISMATCH_PENALTY_LONG_NAME = "smith-waterman-read-to-haplotype-mismatch-penalty";
public static final String SMITH_WATERMAN_READ_TO_HAPLOTYPE_GAP_OPEN_PENALTY_LONG_NAME = "smith-waterman-read-to-haplotype-gap-open-penalty";
public static final String SMITH_WATERMAN_READ_TO_HAPLOTYPE_GAP_EXTEND_PENALTY_LONG_NAME = "smith-waterman-read-to-haplotype-gap-extend-penalty";

/** See documentation at {@link SmithWatermanAlignmentConstants#STANDARD_NGS}. */
private static final SWParameters DEFAULT_DANGLING_END_SMITH_WATERMAN_PARAMETERS = SmithWatermanAlignmentConstants.STANDARD_NGS;
/** See documentation at {@link SmithWatermanAlignmentConstants#NEW_SW_PARAMETERS}. */
private static final SWParameters DEFAULT_HAPLOTYPE_TO_REFERENCE_SMITH_WATERMAN_PARAMETERS = SmithWatermanAlignmentConstants.NEW_SW_PARAMETERS;
/** See documentation at {@link SmithWatermanAlignmentConstants#ALIGNMENT_TO_BEST_HAPLOTYPE_SW_PARAMETERS}. */
private static final SWParameters DEFAULT_READ_TO_HAPLOTYPE_SMITH_WATERMAN_PARAMETERS = SmithWatermanAlignmentConstants.ALIGNMENT_TO_BEST_HAPLOTYPE_SW_PARAMETERS;

public ReadThreadingAssembler createReadThreadingAssembler() {
final ReadThreadingAssembler assemblyEngine = assemblerArgs.makeReadThreadingAssembler();
assemblyEngine.setDebug(assemblerArgs.debugAssembly);
Expand All @@ -72,6 +50,9 @@ public ReadThreadingAssembler createReadThreadingAssembler() {
@ArgumentCollection
public LikelihoodEngineArgumentCollection likelihoodArgs = new LikelihoodEngineArgumentCollection();

@ArgumentCollection
public PileupDetectionArgumentCollection pileupDetectionArgs = new PileupDetectionArgumentCollection();

/**
* The assembled haplotypes and locally realigned reads will be written as BAM to this file if requested. Really
* for debugging purposes only. Note that the output here does not include uninformative reads so that not every
Expand Down Expand Up @@ -172,133 +153,4 @@ public ReadThreadingAssembler createReadThreadingAssembler() {
"that overlap the variant or any base no further than this distance expressed in base pairs",
optional = true)
public int informativeReadOverlapMargin = 2;

/**
* Enables pileup-based haplotype creation and variant detection
*
* NOTE: --pileup-detection is a beta feature. Use this mode at your own risk.
*/
@Argument(fullName= PILEUP_DETECTION_LONG_NAME, doc = "If enabled, the variant caller will create pileup-based haplotypes in addition to the assembly-based haplotype generation.", optional = true)
public boolean usePileupDetection = false;


// -----------------------------------------------------------------------------------------------
// Smith-Waterman parameters for dangling-end recovery
// -----------------------------------------------------------------------------------------------

@Advanced
@Argument(fullName = SMITH_WATERMAN_DANGLING_END_MATCH_VALUE_LONG_NAME,
doc = "Smith-Waterman match value for dangling-end recovery.",
minValue = 0,
optional = true)
public int smithWatermanDanglingEndMatchValue = DEFAULT_DANGLING_END_SMITH_WATERMAN_PARAMETERS.getMatchValue();

@Advanced
@Argument(fullName = SMITH_WATERMAN_DANGLING_END_MISMATCH_PENALTY_LONG_NAME,
doc = "Smith-Waterman mismatch penalty for dangling-end recovery.",
maxValue = 0,
optional = true)
public int smithWatermanDanglingEndMismatchPenalty = DEFAULT_DANGLING_END_SMITH_WATERMAN_PARAMETERS.getMismatchPenalty();

@Advanced
@Argument(fullName = SMITH_WATERMAN_DANGLING_END_GAP_OPEN_PENALTY_LONG_NAME,
doc = "Smith-Waterman gap-open penalty for dangling-end recovery.",
maxValue = 0,
optional = true)
public int smithWatermanDanglingEndGapOpenPenalty = DEFAULT_DANGLING_END_SMITH_WATERMAN_PARAMETERS.getGapOpenPenalty();

@Advanced
@Argument(fullName = SMITH_WATERMAN_DANGLING_END_GAP_EXTEND_PENALTY_LONG_NAME,
doc = "Smith-Waterman gap-extend penalty for dangling-end recovery.",
maxValue = 0,
optional = true)
public int smithWatermanDanglingEndGapExtendPenalty = DEFAULT_DANGLING_END_SMITH_WATERMAN_PARAMETERS.getGapExtendPenalty();

// -----------------------------------------------------------------------------------------------
// Smith-Waterman parameters for haplotype-to-reference alignment
// -----------------------------------------------------------------------------------------------

@Advanced
@Argument(fullName = SMITH_WATERMAN_HAPLOTYPE_TO_REFERENCE_MATCH_VALUE_LONG_NAME,
doc = "Smith-Waterman match value for haplotype-to-reference alignment.",
minValue = 0,
optional = true)
public int smithWatermanHaplotypeToReferenceMatchValue = DEFAULT_HAPLOTYPE_TO_REFERENCE_SMITH_WATERMAN_PARAMETERS.getMatchValue();

@Advanced
@Argument(fullName = SMITH_WATERMAN_HAPLOTYPE_TO_REFERENCE_MISMATCH_PENALTY_LONG_NAME,
doc = "Smith-Waterman mismatch penalty for haplotype-to-reference alignment.",
maxValue = 0,
optional = true)
public int smithWatermanHaplotypeToReferenceMismatchPenalty = DEFAULT_HAPLOTYPE_TO_REFERENCE_SMITH_WATERMAN_PARAMETERS.getMismatchPenalty();

@Advanced
@Argument(fullName = SMITH_WATERMAN_HAPLOTYPE_TO_REFERENCE_GAP_OPEN_PENALTY_LONG_NAME,
doc = "Smith-Waterman gap-open penalty for haplotype-to-reference alignment.",
maxValue = 0,
optional = true)
public int smithWatermanHaplotypeToReferenceGapOpenPenalty = DEFAULT_HAPLOTYPE_TO_REFERENCE_SMITH_WATERMAN_PARAMETERS.getGapOpenPenalty();

@Advanced
@Argument(fullName = SMITH_WATERMAN_HAPLOTYPE_TO_REFERENCE_GAP_EXTEND_PENALTY_LONG_NAME,
doc = "Smith-Waterman gap-extend penalty for haplotype-to-reference alignment.",
maxValue = 0,
optional = true)
public int smithWatermanHaplotypeToReferenceGapExtendPenalty = DEFAULT_HAPLOTYPE_TO_REFERENCE_SMITH_WATERMAN_PARAMETERS.getGapExtendPenalty();

// -----------------------------------------------------------------------------------------------
// Smith-Waterman parameters for read-to-haplotype alignment
// -----------------------------------------------------------------------------------------------

@Advanced
@Argument(fullName = SMITH_WATERMAN_READ_TO_HAPLOTYPE_MATCH_VALUE_LONG_NAME,
doc = "Smith-Waterman match value for read-to-haplotype alignment.",
minValue = 0,
optional = true)
public int smithWatermanReadToHaplotypeMatchValue = DEFAULT_READ_TO_HAPLOTYPE_SMITH_WATERMAN_PARAMETERS.getMatchValue();

@Advanced
@Argument(fullName = SMITH_WATERMAN_READ_TO_HAPLOTYPE_MISMATCH_PENALTY_LONG_NAME,
doc = "Smith-Waterman mismatch penalty for read-to-haplotype alignment.",
maxValue = 0,
optional = true)
public int smithWatermanReadToHaplotypeMismatchPenalty = DEFAULT_READ_TO_HAPLOTYPE_SMITH_WATERMAN_PARAMETERS.getMismatchPenalty();

@Advanced
@Argument(fullName = SMITH_WATERMAN_READ_TO_HAPLOTYPE_GAP_OPEN_PENALTY_LONG_NAME,
doc = "Smith-Waterman gap-open penalty for read-to-haplotype alignment.",
maxValue = 0,
optional = true)
public int smithWatermanReadToHaplotypeGapOpenPenalty = DEFAULT_READ_TO_HAPLOTYPE_SMITH_WATERMAN_PARAMETERS.getGapOpenPenalty();

@Advanced
@Argument(fullName = SMITH_WATERMAN_READ_TO_HAPLOTYPE_GAP_EXTEND_PENALTY_LONG_NAME,
doc = "Smith-Waterman gap-extend penalty for read-to-haplotype alignment.",
maxValue = 0,
optional = true)
public int smithWatermanReadToHaplotypeGapExtendPenalty = DEFAULT_READ_TO_HAPLOTYPE_SMITH_WATERMAN_PARAMETERS.getGapExtendPenalty();

public SWParameters getDanglingEndSWParameters() {
return new SWParameters(
smithWatermanDanglingEndMatchValue,
smithWatermanDanglingEndMismatchPenalty,
smithWatermanDanglingEndGapOpenPenalty,
smithWatermanDanglingEndGapExtendPenalty);
}

public SWParameters getHaplotypeToReferenceSWParameters() {
return new SWParameters(
smithWatermanHaplotypeToReferenceMatchValue,
smithWatermanHaplotypeToReferenceMismatchPenalty,
smithWatermanHaplotypeToReferenceGapOpenPenalty,
smithWatermanHaplotypeToReferenceGapExtendPenalty);
}

public SWParameters getReadToHaplotypeSWParameters() {
return new SWParameters(
smithWatermanReadToHaplotypeMatchValue,
smithWatermanReadToHaplotypeMismatchPenalty,
smithWatermanReadToHaplotypeGapOpenPenalty,
smithWatermanReadToHaplotypeGapExtendPenalty);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.broadinstitute.hellbender.engine.AssemblyRegion;
import org.broadinstitute.hellbender.tools.walkers.ReferenceConfidenceVariantContextMerger;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;
import org.broadinstitute.hellbender.utils.IntervalUtils;
import org.broadinstitute.hellbender.utils.QualityUtils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
Expand Down Expand Up @@ -320,7 +321,7 @@ public static AssemblyResultSet assembleReads(final AssemblyRegion region,
}

if (!forcedPileupAlleles.isEmpty()) {
processPileupAlleles(region, forcedPileupAlleles, argumentCollection.maxMnpDistance, aligner, refHaplotype, assemblyResultSet);
processPileupAlleles(region, forcedPileupAlleles, argumentCollection.pileupDetectionArgs.snpAdajacentToAssemblyIndel, argumentCollection.maxMnpDistance, aligner, refHaplotype, assemblyResultSet);
}


Expand All @@ -340,12 +341,26 @@ public static AssemblyResultSet assembleReads(final AssemblyRegion region,
}
}

/**
* Handle pileup detected alternate alleles.
* @param region
* @param givenAlleles
* @param maxMnpDistance
* @param snpAdjacentToIndelLimit
* @param aligner
* @param refHaplotype
* @param assemblyResultSet
*/
@VisibleForTesting
static void processPileupAlleles(AssemblyRegion region, List<VariantContext> givenAlleles, int maxMnpDistance, SmithWatermanAligner aligner, Haplotype refHaplotype, AssemblyResultSet assemblyResultSet) {
static void processPileupAlleles(final AssemblyRegion region, final List<VariantContext> givenAlleles, final int maxMnpDistance,
final int snpAdjacentToIndelLimit, final SmithWatermanAligner aligner, final Haplotype refHaplotype,
final AssemblyResultSet assemblyResultSet) {
final int assemblyRegionStart = region.getPaddedSpan().getStart();
final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
final Map<Integer, VariantContext> assembledVariants = assemblyResultSet.getVariationEvents(maxMnpDistance).stream()
.collect(Collectors.groupingBy(VariantContext::getStart, Collectors.collectingAndThen(Collectors.toList(), AssemblyBasedCallerUtils::makeMergedVariantContext)));
final Collection<VariantContext> assembledIndels = assemblyResultSet.getVariationEvents(maxMnpDistance).stream().filter(VariantContext::isIndel)
.collect(Collectors.groupingBy(VariantContext::getStart, Collectors.collectingAndThen(Collectors.toList(), AssemblyBasedCallerUtils::makeMergedVariantContext))).values();

Set<Haplotype> baseHaplotypes = new TreeSet<>();
//BG Testing assembledAndNewHaplotypes.addAll(assemblyResultSet.getHaplotypeList());
Expand All @@ -356,9 +371,11 @@ static void processPileupAlleles(AssemblyRegion region, List<VariantContext> giv
.collect(Collectors.toList()));

Map<Kmer, Integer> kmerReadCounts = getKmerReadCounts(region.getHardClippedPileupReads(), 10);
//TODO BG & AH filter out low base and/or mapping quality alleles

for (final VariantContext givenVC : givenAlleles) {
// Remove SNPs that are too close to assembled indels.
final List<VariantContext> givenAllelesFiltered = givenAlleles.stream().filter(vc -> vc.isIndel() || assembledIndels.stream().noneMatch(indel -> vc.withinDistanceOf(indel, snpAdjacentToIndelLimit))).collect(Collectors.toList());

for (final VariantContext givenVC : givenAllelesFiltered) {
//TODO BG Question - What does the assembledVC do?
final VariantContext assembledVC = assembledVariants.get(givenVC.getStart());
final int givenVCRefLength = givenVC.getReference().length();
Expand Down Expand Up @@ -411,6 +428,7 @@ static void processPileupAlleles(AssemblyRegion region, List<VariantContext> giv
assemblyResultSet.regenerateVariationEvents(maxMnpDistance);
}

@VisibleForTesting
static void addGivenAlleles(final int assemblyRegionStart, final List<VariantContext> givenAlleles, final int maxMnpDistance,
final SmithWatermanAligner aligner, final SWParameters haplotypeToReferenceSWParameters, final Haplotype refHaplotype, final AssemblyResultSet assemblyResultSet) {
final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
Expand Down Expand Up @@ -476,7 +494,7 @@ static Map<Kmer, Integer> getKmerReadCounts(final List<GATKRead> hardClippedPil

@VisibleForTesting
static Set<Haplotype> filterPileupHaplotypes(final List<Haplotype> onlyNewHaplotypes,
final Map<Kmer, Integer> kmerReadCounts,
final Map<Kmer, Integer> kmerReadCounts,
final int numPileupHaplotypes) {
// TODO AH & BG add the following code to check for quality of the SNPs
// // make sure we're supposed to look for high entropy
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -584,8 +584,8 @@ public List<VariantContext> callRegion(final AssemblyRegion region, final Featur
}

List<VariantContext> forcedPileupAlleles = Collections.emptyList();
if(hcArgs.usePileupDetection){
forcedPileupAlleles = PileupBasedAlleles.getPileupVariantContexts(region.getAlignmentData());
if(hcArgs.pileupDetectionArgs.usePileupDetection){
forcedPileupAlleles = PileupBasedAlleles.getPileupVariantContexts(region.getAlignmentData(), hcArgs.pileupDetectionArgs, readsHeader);
}

// run the local assembler, getting back a collection of information on how we should proceed
Expand Down
Loading

0 comments on commit df424c8

Please sign in to comment.