Skip to content

Commit

Permalink
Improvements and refactoring of Nucleotide.java
Browse files Browse the repository at this point in the history
  • Loading branch information
vruano committed Aug 21, 2018
1 parent 17bd259 commit 6cf0639
Show file tree
Hide file tree
Showing 15 changed files with 637 additions and 117 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,6 @@ public Object onTraversalSuccess() {
@Override
public void apply(AlignmentContext alignmentContext, ReferenceContext referenceContext, FeatureContext featureContext) {
final byte refAsByte = referenceContext.getBase();
allelicCountCollector.collectAtLocus(Nucleotide.valueOf(refAsByte), alignmentContext.getBasePileup(), alignmentContext.getLocation(), minimumBaseQuality);
allelicCountCollector.collectAtLocus(Nucleotide.decode(refAsByte), alignmentContext.getBasePileup(), alignmentContext.getLocation(), minimumBaseQuality);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ private static IntervalList generateBins(final IntervalList preparedIntervalList
private static IntervalList filterBinsContainingOnlyNs(final IntervalList unfilteredBins, final ReferenceDataSource reference) {
final IntervalList bins = new IntervalList(reference.getSequenceDictionary());
for (final Interval unfilteredBin : unfilteredBins) {
if (!Utils.stream(reference.query(new SimpleInterval(unfilteredBin))).allMatch(b -> b == Nucleotide.N.toBase())) {
if (!Utils.stream(reference.query(new SimpleInterval(unfilteredBin))).allMatch(b -> Nucleotide.decode(b) == Nucleotide.N)) {
bins.add(unfilteredBin);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ enum AllelicCountTableColumn {
final int position = dataLine.getInt(AllelicCountTableColumn.POSITION);
final int refReadCount = dataLine.getInt(AllelicCountTableColumn.REF_COUNT);
final int altReadCount = dataLine.getInt(AllelicCountTableColumn.ALT_COUNT);
final Nucleotide refNucleotide = Nucleotide.valueOf(dataLine.get(AllelicCountTableColumn.REF_NUCLEOTIDE.name()).getBytes()[0]);
final Nucleotide altNucleotide = Nucleotide.valueOf(dataLine.get(AllelicCountTableColumn.ALT_NUCLEOTIDE.name()).getBytes()[0]);
final Nucleotide refNucleotide = Nucleotide.decode(dataLine.get(AllelicCountTableColumn.REF_NUCLEOTIDE.name()).charAt(0));
final Nucleotide altNucleotide = Nucleotide.decode(dataLine.get(AllelicCountTableColumn.ALT_NUCLEOTIDE.name()).charAt(0));
final SimpleInterval interval = new SimpleInterval(contig, position, position);
return new AllelicCount(interval, refReadCount, altReadCount, refNucleotide, altNucleotide);
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public class AltSiteRecord {

public AltSiteRecord(final String referenceContext, final int refCount, final int altCount,
final int refF1R2, final int altF1R2, final Nucleotide altAllele){
Utils.validateArg(Nucleotide.REGULAR_BASES.contains(altAllele), "altAllele must be one of {A,C,G,T} but got " + altAllele);
Utils.validateArg(altAllele != null && altAllele.isStandard(), "altAllele must be one of {A,C,G,T} but got " + altAllele);

this.referenceContext = referenceContext;
this.refCount = refCount;
Expand Down Expand Up @@ -63,7 +63,7 @@ public AltSiteRecord(final String referenceContext, final int refCount, final in
public AltSiteRecord getReverseComplementOfRecord(){
Utils.validate(!F1R2FilterConstants.CANONICAL_KMERS.contains(referenceContext), "for consistency, don't make the " +
"revcomp record of a canonical reference context");
final Nucleotide revCompOfAlt = Nucleotide.complement(altAllele.toBase());
final Nucleotide revCompOfAlt = altAllele.complement();
final int newRefF1R2 = refCount - refF1R2;
final int newAltF1R2 = altCount - altF1R2;
return new AltSiteRecord(SequenceUtil.reverseComplement(referenceContext), refCount, altCount, newRefF1R2,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,14 +154,14 @@ public void apply(final AlignmentContext alignmentContext, final ReferenceContex
}

// If we got here, we have an alt site with a single alt base
final Nucleotide altBase = Nucleotide.valueOf(BaseUtils.baseIndexToSimpleBase(altBaseIndex));
final Nucleotide altBase = Nucleotide.decode(BaseUtils.baseIndexToSimpleBase(altBaseIndex));

final int refCount = baseCounts[refBase.ordinal()];
final int altCount = baseCounts[altBaseIndex];
Utils.validate(altCount > 0, "We must have a nonzero alt read but got " + altCount);

final int refF1R2 = pileup.getNumberOfElements(pe -> Nucleotide.valueOf(pe.getBase()) == refBase && ReadUtils.isF1R2(pe.getRead()));
final int altF1R2 = pileup.getNumberOfElements(pe -> Nucleotide.valueOf(pe.getBase()) == altBase && ReadUtils.isF1R2(pe.getRead()));
final int refF1R2 = pileup.getNumberOfElements(pe -> Nucleotide.decode(pe.getBase()) == refBase && ReadUtils.isF1R2(pe.getRead()));
final int altF1R2 = pileup.getNumberOfElements(pe -> Nucleotide.decode(pe.getBase()) == altBase && ReadUtils.isF1R2(pe.getRead()));

if (altCount == 1) {
final ReadOrientation type = altF1R2 == 1 ? F1R2 : F2R1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ public DepthOneHistograms(final int maxDepth) {

// Initialize, for each reference context, the (Alt Allele, Artifact Type) -> Histogram map
F1R2FilterConstants.ALL_KMERS.forEach(context -> {
map.put(context, new HashMap<>((Nucleotide.REGULAR_BASES.size() - 1) * ReadOrientation.SIZE));
map.put(context, new HashMap<>((Nucleotide.STANDARD_DNA_BASES.size() - 1) * ReadOrientation.SIZE));

for (Nucleotide altAllele : Nucleotide.REGULAR_BASES) {
for (Nucleotide altAllele : Nucleotide.STANDARD_DNA_BASES) {
// Skip e.g. AGT -> AGT because G is not an alt allele

if (altAllele == F1R2FilterUtils.getMiddleBase(context)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public class F1R2FilterConstants {
// We combine all sites of depths above this value in the last bin of the histogram
static final int DEFAULT_MAX_DEPTH = 200;

static final int numAltHistogramsPerContext = (Nucleotide.REGULAR_BASES.size() - 1) * (ReadOrientation.values().length);
static final int numAltHistogramsPerContext = (Nucleotide.STANDARD_DNA_BASES.size() - 1) * (ReadOrientation.values().length);

public static Integer[] getEmptyBins(final int maxDepth){
return IntStream.rangeClosed(1, maxDepth).boxed().toArray( Integer[]::new );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ public static List<Histogram<Integer>> combineAltDepthOneHistogramWithRC(final L

final List<Histogram<Integer>> combinedHistograms = new ArrayList<>(F1R2FilterConstants.numAltHistogramsPerContext);

for (Nucleotide altAllele : Nucleotide.REGULAR_BASES){
for (Nucleotide altAllele : Nucleotide.STANDARD_DNA_BASES){
// Skip when the alt base is the ref base, which doesn't make sense because this is a histogram of alt sites
if (altAllele == F1R2FilterUtils.getMiddleBase(refContext)){
continue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ private void takeEstep(final double[] artifactPriors) {
// Compute the responsibilities of alt sites with depth=1
for (int i = 0; i < maxDepth; i++){
final int depth = i+1;
for (Nucleotide altAllele : Nucleotide.REGULAR_BASES){
for (Nucleotide altAllele : Nucleotide.STANDARD_DNA_BASES){
for (ReadOrientation orientation : ReadOrientation.values()){
if (altAllele == refAllele){
continue;
Expand Down
Loading

0 comments on commit 6cf0639

Please sign in to comment.