-
Notifications
You must be signed in to change notification settings - Fork 597
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
A port of FastaAlternateReferenceMaker and FastaReferenceMaker from G…
…ATK3 (#5549) * Ported FastaAlternateReferenceMaker and FastaReferenceMaker from GATK3 * Added a new walker type ReferenceWalker, and a new tool CountBasesInReference to demonstrate it * Made changes to the close behavior in FastaWriter, and added additional buffering Resolves #4891
- Loading branch information
1 parent
5cdb1d9
commit 64e2d3a
Showing
111 changed files
with
2,212 additions
and
112 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
107 changes: 107 additions & 0 deletions
107
src/main/java/org/broadinstitute/hellbender/engine/ReferenceWalker.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
package org.broadinstitute.hellbender.engine; | ||
|
||
import org.broadinstitute.hellbender.engine.filters.CountingReadFilter; | ||
import org.broadinstitute.hellbender.tools.examples.ExampleReferenceWalker; | ||
import org.broadinstitute.hellbender.utils.SimpleInterval; | ||
import org.broadinstitute.hellbender.utils.iterators.IntervalLocusIterator; | ||
|
||
/** | ||
* A reference walker is a tool which processes each base in a given reference. Each base can be processed individually | ||
* or in a context of multiple bases in a window. Reads and Feature tracks can optionally be included as well. The | ||
* reference bases to process can be subset to a given set of intervals. | ||
* | ||
* ReferenceWalker authors must implement the apply() method to process each position, and may optionally implement | ||
* {@link #onTraversalStart()} and/or {@link #onTraversalSuccess()}. | ||
* | ||
* Tool authors may also implement {@link #getReferenceWindow(SimpleInterval)} to provide additional bases of context | ||
* around each position. | ||
* | ||
* See the {@link ExampleReferenceWalker} walker for an example. | ||
*/ | ||
public abstract class ReferenceWalker extends GATKTool { | ||
|
||
@Override | ||
public String getProgressMeterRecordLabel() { return "bases"; } | ||
|
||
@Override | ||
public final boolean requiresReference() { return true; } | ||
|
||
/** | ||
* Initialize data sources for traversal. | ||
* | ||
* Marked final so that tool authors don't override it. Tool authors should override onTraversalStart() instead. | ||
*/ | ||
@Override | ||
protected final void onStartup() { | ||
super.onStartup(); | ||
} | ||
|
||
/** | ||
* Implementation of reference-locus-based traversal. | ||
* Subclasses can override to provide own behavior but default implementation should be suitable for most uses. | ||
*/ | ||
@Override | ||
public void traverse() { | ||
final CountingReadFilter readFilter = makeReadFilter(); | ||
|
||
for(final SimpleInterval locus : getIntervalIterator()){ | ||
final SimpleInterval referenceWindow = getReferenceWindow(locus); | ||
final ReferenceContext referenceContext = new ReferenceContext(reference, locus, referenceWindow); | ||
apply(referenceContext, | ||
new ReadsContext(reads, referenceContext.getWindow(), readFilter), // Will create an empty ReadsContext if reads == null | ||
new FeatureContext(features, referenceContext.getWindow())); // Will create an empty FeatureContext if features == null | ||
|
||
progressMeter.update(referenceContext.getInterval()); | ||
}; | ||
|
||
} | ||
|
||
/** | ||
* Determine the window to use when creating the ReferenceContext in apply. This determines which reference bases are | ||
* seen at each position, as well as which reads / features are considered to overlap the reference site. | ||
* | ||
* The default implementation returns the single reference locus passed that is passed in, but subclasses may override | ||
* this method in order to change the window size. | ||
* | ||
* @param locus the current locus being processed in the traversal | ||
* @return the window to use when creating the ReferenceContext that is passed into {@link #apply(ReferenceContext, ReadsContext, FeatureContext)} | ||
*/ | ||
protected SimpleInterval getReferenceWindow(SimpleInterval locus){ | ||
return locus; | ||
} | ||
|
||
private Iterable<SimpleInterval> getIntervalIterator(){ | ||
return () -> new IntervalLocusIterator(getTraversalIntervals().iterator()); | ||
} | ||
|
||
/** | ||
* Process an individual reference locus (with optional contextual information). Must be implemented by tool authors. | ||
* In general, tool authors should simply stream their output from apply(), and maintain as little internal state | ||
* as possible. | ||
* | ||
* @param referenceContext Reference bases at the current locus. The window around the single locus is provided by {@link #getReferenceWindow(SimpleInterval)} | ||
* Additional bases my be retrieved by invoking the by invoking {@link ReferenceContext#setWindow} | ||
* on this object before calling {@link ReferenceContext#getBases}, however this will not effect | ||
* the set of reads and features that are considered overlapping which are based on the preset | ||
* window passed in. | ||
* @param readsContext Reads spanning the current reference window. Will be an empty, but non-null, context object | ||
* if there is no backing source of Read data (in which case all queries on it will return an | ||
* empty List). | ||
* @param featureContext Features spanning the current reference window. Will be an empty, but non-null, context object | ||
* if there is no backing source of Feature data (in which case all queries on it will return an | ||
* empty List). | ||
* | ||
*/ | ||
public abstract void apply(ReferenceContext referenceContext, ReadsContext readsContext, FeatureContext featureContext ); | ||
|
||
/** | ||
* Shutdown data sources. | ||
* | ||
* Marked final so that tool authors don't override it. Tool authors should override {@link #onTraversalSuccess()} instead. | ||
*/ | ||
@Override | ||
protected final void onShutdown() { | ||
// Overridden only to make final so that concrete tool implementations don't override | ||
super.onShutdown(); | ||
} | ||
} |
81 changes: 81 additions & 0 deletions
81
src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleReferenceWalker.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
package org.broadinstitute.hellbender.tools.examples; | ||
|
||
import com.google.common.annotations.VisibleForTesting; | ||
import htsjdk.variant.variantcontext.VariantContext; | ||
import org.broadinstitute.barclay.argparser.Argument; | ||
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; | ||
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; | ||
import org.broadinstitute.hellbender.cmdline.programgroups.ExampleProgramGroup; | ||
import org.broadinstitute.hellbender.engine.*; | ||
import org.broadinstitute.hellbender.utils.SimpleInterval; | ||
|
||
import java.util.Comparator; | ||
import java.util.HashMap; | ||
import java.util.Map; | ||
import java.util.Map.Entry; | ||
|
||
/** | ||
* Counts the number of times each reference context is seen as well as how many times it's overlapped by reads and variants. | ||
* Why you would want to do this is a mystery. | ||
* | ||
* This is a toy example to illustrate how to implement a ReferenceWalker. | ||
*/ | ||
@CommandLineProgramProperties( | ||
summary = "Example of how to implement a ReferenceWalker that uses reads and features as well as a custom window", | ||
oneLineSummary = "Example of how to implement a ReferenceWalker", | ||
programGroup = ExampleProgramGroup.class, | ||
omitFromCommandLine = true) | ||
public class ExampleReferenceWalker extends ReferenceWalker { | ||
|
||
@Argument(fullName = StandardArgumentDefinitions.VARIANT_LONG_NAME, | ||
shortName = StandardArgumentDefinitions.VARIANT_SHORT_NAME, | ||
doc="variants to count overlaps of") | ||
private FeatureInput<VariantContext> variants; | ||
|
||
@VisibleForTesting | ||
final Map<String, OverlapCounts> contextCounts = new HashMap<>(); | ||
|
||
@Override | ||
protected SimpleInterval getReferenceWindow(SimpleInterval locus) { | ||
// add one base of padding to each side of each reference locus. | ||
return new SimpleInterval(locus.getContig(), Math.max(locus.getStart() -1, 1), locus.getStart()+1); | ||
} | ||
|
||
@Override | ||
public void apply(ReferenceContext referenceContext, ReadsContext readsContext, FeatureContext featureContext) { | ||
final byte[] bases = referenceContext.getBases(); | ||
final String baseString = new String(bases); | ||
final OverlapCounts counts = contextCounts.getOrDefault(baseString, new OverlapCounts()); | ||
if(readsContext.iterator().hasNext()){ | ||
counts.overlappedByReads++; | ||
} | ||
|
||
if(!featureContext.getValues(variants).isEmpty()){ | ||
counts.overlappedByVariants++; | ||
} | ||
|
||
counts.timesSeen++; | ||
|
||
contextCounts.put(baseString, counts); | ||
} | ||
|
||
@Override | ||
public Object onTraversalSuccess(){ | ||
contextCounts.entrySet().stream() | ||
.sorted(Comparator.comparing(Entry::getKey)) | ||
.forEachOrdered(entry -> System.out.println(entry.getKey() + " " + entry.getValue().toString())); | ||
return 0; | ||
} | ||
|
||
@VisibleForTesting | ||
static class OverlapCounts { | ||
long timesSeen = 0; | ||
long overlappedByReads = 0; | ||
long overlappedByVariants = 0; | ||
|
||
@Override | ||
public String toString(){ | ||
return String.format("Seen: %d Reads %d Variants %d", timesSeen, overlappedByReads, overlappedByVariants); | ||
} | ||
} | ||
} |
37 changes: 37 additions & 0 deletions
37
src/main/java/org/broadinstitute/hellbender/tools/walkers/fasta/CountBasesInReference.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
package org.broadinstitute.hellbender.tools.walkers.fasta; | ||
|
||
import com.google.common.annotations.VisibleForTesting; | ||
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; | ||
import org.broadinstitute.barclay.help.DocumentedFeature; | ||
import org.broadinstitute.hellbender.engine.FeatureContext; | ||
import org.broadinstitute.hellbender.engine.ReadsContext; | ||
import org.broadinstitute.hellbender.engine.ReferenceContext; | ||
import org.broadinstitute.hellbender.engine.ReferenceWalker; | ||
import picard.cmdline.programgroups.ReferenceProgramGroup; | ||
|
||
@DocumentedFeature | ||
@CommandLineProgramProperties( | ||
oneLineSummary = "Count the numbers of each base in a reference file", | ||
summary = "Count the number of times each individual base occurs in a reference file.", | ||
programGroup = ReferenceProgramGroup.class | ||
) | ||
public class CountBasesInReference extends ReferenceWalker { | ||
@VisibleForTesting | ||
final long[] baseCounts = new long[256]; | ||
|
||
@Override | ||
public void apply(ReferenceContext referenceContext, ReadsContext read, FeatureContext featureContext) { | ||
baseCounts[referenceContext.getBase()]++; | ||
} | ||
|
||
@Override | ||
public Object onTraversalSuccess(){ | ||
for (int i = 0; i < baseCounts.length; i++) { | ||
final long count = baseCounts[i]; | ||
if (count > 0){ | ||
System.out.println((char)i + " : " + count ); | ||
} | ||
} | ||
return 0; | ||
} | ||
} |
Oops, something went wrong.