diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCounts.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCounts.java index 6b63c8f70cf..56339b68296 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCounts.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCounts.java @@ -149,6 +149,6 @@ public Object onTraversalSuccess() { @Override public void apply(AlignmentContext alignmentContext, ReferenceContext referenceContext, FeatureContext featureContext) { final byte refAsByte = referenceContext.getBase(); - allelicCountCollector.collectAtLocus(Nucleotide.valueOf(refAsByte), alignmentContext.getBasePileup(), alignmentContext.getLocation(), minimumBaseQuality); + allelicCountCollector.collectAtLocus(Nucleotide.decode(refAsByte), alignmentContext.getBasePileup(), alignmentContext.getLocation(), minimumBaseQuality); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/PreprocessIntervals.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/PreprocessIntervals.java index 3af7c4d93d6..dc33c63ad09 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/PreprocessIntervals.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/PreprocessIntervals.java @@ -202,7 +202,7 @@ private static IntervalList generateBins(final IntervalList preparedIntervalList private static IntervalList filterBinsContainingOnlyNs(final IntervalList unfilteredBins, final ReferenceDataSource reference) { final IntervalList bins = new IntervalList(reference.getSequenceDictionary()); for (final Interval unfilteredBin : unfilteredBins) { - if (!Utils.stream(reference.query(new SimpleInterval(unfilteredBin))).allMatch(b -> b == Nucleotide.N.toBase())) { + if (!Utils.stream(reference.query(new SimpleInterval(unfilteredBin))).allMatch(b -> Nucleotide.decode(b) == Nucleotide.N)) { bins.add(unfilteredBin); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AllelicCountCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AllelicCountCollection.java index 6367420b20d..e1ae69e1649 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AllelicCountCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AllelicCountCollection.java @@ -40,8 +40,8 @@ enum AllelicCountTableColumn { final int position = dataLine.getInt(AllelicCountTableColumn.POSITION); final int refReadCount = dataLine.getInt(AllelicCountTableColumn.REF_COUNT); final int altReadCount = dataLine.getInt(AllelicCountTableColumn.ALT_COUNT); - final Nucleotide refNucleotide = Nucleotide.valueOf(dataLine.get(AllelicCountTableColumn.REF_NUCLEOTIDE.name()).getBytes()[0]); - final Nucleotide altNucleotide = Nucleotide.valueOf(dataLine.get(AllelicCountTableColumn.ALT_NUCLEOTIDE.name()).getBytes()[0]); + final Nucleotide refNucleotide = Nucleotide.decode(dataLine.get(AllelicCountTableColumn.REF_NUCLEOTIDE.name()).charAt(0)); + final Nucleotide altNucleotide = Nucleotide.decode(dataLine.get(AllelicCountTableColumn.ALT_NUCLEOTIDE.name()).charAt(0)); final SimpleInterval interval = new SimpleInterval(contig, position, position); return new AllelicCount(interval, refReadCount, altReadCount, refNucleotide, altNucleotide); }; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/AltSiteRecord.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/AltSiteRecord.java index 420a7ae12e1..842a5c81134 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/AltSiteRecord.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/AltSiteRecord.java @@ -29,7 +29,7 @@ public class AltSiteRecord { public AltSiteRecord(final String referenceContext, final int refCount, final int altCount, final int refF1R2, final int altF1R2, final Nucleotide altAllele){ - Utils.validateArg(Nucleotide.REGULAR_BASES.contains(altAllele), "altAllele must be one of {A,C,G,T} but got " + altAllele); + Utils.validateArg(altAllele != null && altAllele.isStandard(), "altAllele must be one of {A,C,G,T} but got " + altAllele); this.referenceContext = referenceContext; this.refCount = refCount; @@ -63,7 +63,7 @@ public AltSiteRecord(final String referenceContext, final int refCount, final in public AltSiteRecord getReverseComplementOfRecord(){ Utils.validate(!F1R2FilterConstants.CANONICAL_KMERS.contains(referenceContext), "for consistency, don't make the " + "revcomp record of a canonical reference context"); - final Nucleotide revCompOfAlt = Nucleotide.complement(altAllele.toBase()); + final Nucleotide revCompOfAlt = altAllele.complement(); final int newRefF1R2 = refCount - refF1R2; final int newAltF1R2 = altCount - altF1R2; return new AltSiteRecord(SequenceUtil.reverseComplement(referenceContext), refCount, altCount, newRefF1R2, diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/CollectF1R2Counts.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/CollectF1R2Counts.java index 7f24c1032f5..2b3514b6f43 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/CollectF1R2Counts.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/CollectF1R2Counts.java @@ -154,14 +154,14 @@ public void apply(final AlignmentContext alignmentContext, final ReferenceContex } // If we got here, we have an alt site with a single alt base - final Nucleotide altBase = Nucleotide.valueOf(BaseUtils.baseIndexToSimpleBase(altBaseIndex)); + final Nucleotide altBase = Nucleotide.decode(BaseUtils.baseIndexToSimpleBase(altBaseIndex)); final int refCount = baseCounts[refBase.ordinal()]; final int altCount = baseCounts[altBaseIndex]; Utils.validate(altCount > 0, "We must have a nonzero alt read but got " + altCount); - final int refF1R2 = pileup.getNumberOfElements(pe -> Nucleotide.valueOf(pe.getBase()) == refBase && ReadUtils.isF1R2(pe.getRead())); - final int altF1R2 = pileup.getNumberOfElements(pe -> Nucleotide.valueOf(pe.getBase()) == altBase && ReadUtils.isF1R2(pe.getRead())); + final int refF1R2 = pileup.getNumberOfElements(pe -> Nucleotide.decode(pe.getBase()) == refBase && ReadUtils.isF1R2(pe.getRead())); + final int altF1R2 = pileup.getNumberOfElements(pe -> Nucleotide.decode(pe.getBase()) == altBase && ReadUtils.isF1R2(pe.getRead())); if (altCount == 1) { final ReadOrientation type = altF1R2 == 1 ? F1R2 : F2R1; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/DepthOneHistograms.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/DepthOneHistograms.java index b7fb44d2fb9..d5a8a96bcc7 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/DepthOneHistograms.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/DepthOneHistograms.java @@ -24,9 +24,9 @@ public DepthOneHistograms(final int maxDepth) { // Initialize, for each reference context, the (Alt Allele, Artifact Type) -> Histogram map F1R2FilterConstants.ALL_KMERS.forEach(context -> { - map.put(context, new HashMap<>((Nucleotide.REGULAR_BASES.size() - 1) * ReadOrientation.SIZE)); + map.put(context, new HashMap<>((Nucleotide.STANDARD_DNA_BASES.size() - 1) * ReadOrientation.SIZE)); - for (Nucleotide altAllele : Nucleotide.REGULAR_BASES) { + for (Nucleotide altAllele : Nucleotide.STANDARD_DNA_BASES) { // Skip e.g. AGT -> AGT because G is not an alt allele if (altAllele == F1R2FilterUtils.getMiddleBase(context)) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/F1R2FilterConstants.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/F1R2FilterConstants.java index b25cb5c7da4..7c964d4129d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/F1R2FilterConstants.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/F1R2FilterConstants.java @@ -40,7 +40,7 @@ public class F1R2FilterConstants { // We combine all sites of depths above this value in the last bin of the histogram static final int DEFAULT_MAX_DEPTH = 200; - static final int numAltHistogramsPerContext = (Nucleotide.REGULAR_BASES.size() - 1) * (ReadOrientation.values().length); + static final int numAltHistogramsPerContext = (Nucleotide.STANDARD_DNA_BASES.size() - 1) * (ReadOrientation.values().length); public static Integer[] getEmptyBins(final int maxDepth){ return IntStream.rangeClosed(1, maxDepth).boxed().toArray( Integer[]::new ); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModel.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModel.java index 0f96d902a5c..b371a4fe450 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModel.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModel.java @@ -180,7 +180,7 @@ public static List> combineAltDepthOneHistogramWithRC(final L final List> combinedHistograms = new ArrayList<>(F1R2FilterConstants.numAltHistogramsPerContext); - for (Nucleotide altAllele : Nucleotide.REGULAR_BASES){ + for (Nucleotide altAllele : Nucleotide.STANDARD_DNA_BASES){ // Skip when the alt base is the ref base, which doesn't make sense because this is a histogram of alt sites if (altAllele == F1R2FilterUtils.getMiddleBase(refContext)){ continue; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModelEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModelEngine.java index 825f1bae456..7e54dec55ed 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModelEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModelEngine.java @@ -197,7 +197,7 @@ private void takeEstep(final double[] artifactPriors) { // Compute the responsibilities of alt sites with depth=1 for (int i = 0; i < maxDepth; i++){ final int depth = i+1; - for (Nucleotide altAllele : Nucleotide.REGULAR_BASES){ + for (Nucleotide altAllele : Nucleotide.STANDARD_DNA_BASES){ for (ReadOrientation orientation : ReadOrientation.values()){ if (altAllele == refAllele){ continue; diff --git a/src/main/java/org/broadinstitute/hellbender/utils/Nucleotide.java b/src/main/java/org/broadinstitute/hellbender/utils/Nucleotide.java index 612a657c72a..93c6122d14b 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/Nucleotide.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/Nucleotide.java @@ -1,46 +1,123 @@ package org.broadinstitute.hellbender.utils; +import javax.validation.constraints.NotNull; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.stream.LongStream; /** - * Represents the nucleotide alphabet. + * Represents the nucleotide alphabet with support for IUPAC ambiguity codes. * *

- * This enumeration not only contains concrete nucleotides, but also + * This enumeration not only contains standard (non-ambiguous) nucleotides, but also * values to represent ambiguous and invalid codes. *

* * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> */ public enum Nucleotide { - A, C, G, T, N, X, INVALID; - public static final List REGULAR_BASES = Arrays.asList(A, C, G, T); + // Standard nucleotide codes, + // and their one-bit-encoding masks CODE(0xTGCA): + A(0b0001), + C(0b0010), + G(0b0100), + T(0b1000), - private static final Nucleotide[] baseToValue = new Nucleotide[Byte.MAX_VALUE + 1]; + // Extended codes: + // CODE(included nucs) + R(A, G), // Purines. + Y(C, T), // Pyrimidines. + S(C, G), // Strong nucletoides. + W(A, T), // Weak nucleotides. + K(G, T), // Keto nucleotides. + M(A, C), // Amino nucleotides. + // The following 4 tri-nucleotide codes don't have a proper long name, they are simply all-except-one. + B(C, G, T), // Not-A (B follows A) + D(A, G, T), // Not-C (D follows C) + H(A, C, T), // Not-G (H follows G) + V(A, C, G), // Not-V (V follows T) + // Any + N(A, C, G, T), // Any/Unknown + // and X/invalid-call: + X(); // Invalid. - private static final Nucleotide[] reverseComplement = new Nucleotide[Byte.MAX_VALUE]; + // As far as in enum is concern, + // references to Uracil (U) are considered equivalent to Thymine (T) as they are transcription equivalent. + public static final Nucleotide U = T; + + // Convenient long form alternative names for some of the enumeration values: + + // Long form standard nucleotide names. + public static final Nucleotide ADENINE = A; + public static final Nucleotide CYTOSINE = C; + public static final Nucleotide GUANINE = G; + public static final Nucleotide THYMINE = T; + public static final Nucleotide URACIL = U; + + // Ambiguous nucleotide groups with proper long form names: + public static final Nucleotide STRONG = S; + public static final Nucleotide WEAK = W; + public static final Nucleotide PURINE = R; + public static final Nucleotide PYRIMIDINE = Y; + public static final Nucleotide AMINO = M; + public static final Nucleotide KETO = K; + public static final Nucleotide ANY = N; + public static final Nucleotide UNKNOWN = N; + public static final Nucleotide INVALID = X; + + /** + * List of the standard (non-redundant) nucleotide values in their preferred alphabetical order. + */ + public static final List STANDARD_DNA_BASES = Collections.unmodifiableList(Arrays.asList(A, C, G, T)); + + // actually calling values() is costly (creates a new array every time) and often we do just to find out the + // total number of constants. + private static final int NUMBER_OF_CONSTANTS; + + private static final Nucleotide[] baseToValue; + private static final Nucleotide[] maskToValue; static { + final Nucleotide[] values = values(); + NUMBER_OF_CONSTANTS = values.length; + baseToValue = new Nucleotide[1 << Byte.SIZE]; + maskToValue = new Nucleotide[1 << 4]; Arrays.fill(baseToValue, INVALID); - baseToValue['a'] = baseToValue['A'] = A; - baseToValue['c'] = baseToValue['C'] = C; - baseToValue['g'] = baseToValue['G'] = G; - baseToValue['t'] = baseToValue['T'] = T; - baseToValue['u'] = baseToValue['U'] = T; - baseToValue['x'] = baseToValue['X'] = X; - baseToValue['n'] = baseToValue['N'] = N; + for (final Nucleotide nucleotide : values) { + baseToValue[nucleotide.lowerCaseByteEncoding & 0xFF] + = baseToValue[nucleotide.upperCaseByteEncoding & 0xFF] = nucleotide; + maskToValue[nucleotide.mask] = nucleotide; + } + baseToValue['u'] = baseToValue['U'] = U; + } + + private final int mask; + private final boolean isStandard; + private Nucleotide complement; + private Nucleotide transition; + private Nucleotide transversion; + + /** + * Holds lower-case byte encoding for this nucleotide; {@code 0} for {@link Nucleotide#INVALID}. + */ + private final byte lowerCaseByteEncoding; + + /** + * Holds the upper-case byte encoding for this nucleotide; {@code 0} for {@link Nucleotide#INVALID}. + */ + private final byte upperCaseByteEncoding; + + Nucleotide(final int mask) { + this.mask = mask; + isStandard = Integer.bitCount(mask & 0b1111) == 1; + lowerCaseByteEncoding = (byte) Character.toLowerCase(name().charAt(0)); + upperCaseByteEncoding = (byte) Character.toUpperCase(name().charAt(0)); + } - Arrays.fill(reverseComplement, INVALID); - reverseComplement['a'] = reverseComplement['A'] = T; - reverseComplement['c'] = reverseComplement['C'] = G; - reverseComplement['g'] = reverseComplement['G'] = C; - reverseComplement['t'] = reverseComplement['T'] = A; - reverseComplement['u'] = reverseComplement['U'] = A; - reverseComplement['x'] = reverseComplement['X'] = X; - reverseComplement['n'] = reverseComplement['N'] = N; + Nucleotide(final Nucleotide ... nucs) { + this(Arrays.stream(nucs).mapToInt(nuc -> nuc.mask).reduce((a, b) -> a | b).orElse(0)); } /** @@ -51,16 +128,18 @@ public enum Nucleotide { *

* The {@link #INVALID} nucleotide does not have an actual base then resulting in an exception. *

- * @throws UnsupportedOperationException if this nucleotide does not have a byte representation such - * as {@link #INVALID}. - * @return a positive byte value. - */ - public byte toBase() { - if (this == INVALID) { - throw new UnsupportedOperationException("the invalid nucleotide does not have a base byte"); - } else { - return (byte) name().charAt(0); - } + * @return a valid byte representation for a nucleotide, {@code 0} for {@link Nucleotide#INVALID}. + */ + public byte encodeAsByte(final boolean upperCase) { + return upperCase ? upperCaseByteEncoding : lowerCaseByteEncoding; + } + + /** + * Returns the nucleotide encoding in a byte using its upper-case representation. + * @return a valid upper-case byte representation for a nucleotide, {@code 0} for {@link Nucleotide#INVALID}. + */ + public byte encodeAsByte() { + return upperCaseByteEncoding; } /** @@ -70,34 +149,199 @@ public byte toBase() { * @return never {@code null}, but {@link #INVALID} if the base code does not * correspond to a valid nucleotide specification. */ - public static Nucleotide valueOf(final byte base) { + public static Nucleotide decode(final byte base) { return baseToValue[Utils.validIndex(base, baseToValue.length)]; } - public static Nucleotide complement(final byte base){ - return reverseComplement[Utils.validIndex(base, reverseComplement.length)]; + public static Nucleotide decode(final char base) { + return decode((byte) base); + } + + /** + * Checks whether the nucleotide refers to a concrete (rather than ambiguous) base. + * @return {@code true} iff this is a concrete nucleotide. + */ + public boolean isStandard() { + return isStandard; + } + + /** + * Checks whether the nucleotide refer to an ambiguous base. + * @return {@code true} iff this is an ambiguous nucleotide. + */ + public boolean isAmbiguous() { + return !isStandard && this != INVALID; + } + + public boolean isValid() { + return this != INVALID; + } + + /** + * Checks whether this nucleotide code encloses all possible nucleotides for another code. + * @param other the other nucleotide to compare to. + * @return {@code true} iff any nucleotide in {@code other} is enclosed in this code. + */ + public boolean includes(final Nucleotide other) { + Utils.nonNull(other); + return other != INVALID && (mask & other.mask) == other.mask; + } + + public boolean includes(final byte b) { + return includes(decode(b)); + } + + public Nucleotide intersect(final Nucleotide other) { + return maskToValue[mask & other.mask]; + } + + /** + * Checks whether to base encodings make reference to the same {@link #Nucleotide} + * instance regardless of their case. + *

+ * This method is a shorthard for: + *

{@link #decode}(a){@link #same(Nucleotide) same}({@link #decode}(b)) 
. + *

+ * + *

+ * The order of the inputs is not relevant, therefore {@code same(a, b) == same(b, a)} for any + * given {@code a} and {@code b}. + *

+ *

+ * Notice that if either or both input bases make reference to an invalid nucleotide (i.e.

 {@link #decode}(x) == {@link #INVALID}},
+     *      this method will return {@code false} even if {@code a == b}.
+     *  

+ * @param a the first base to compare (however order is not relevant). + * @param b the second base to compare (however order is not relevant). + * @return {@code true} iff {@code {@link #decode}}.same({@link #decode}(b))}} + */ + public static boolean same(final byte a, final byte b) { + return baseToValue[a] == baseToValue[b] && baseToValue[a] != INVALID; + } + + /** + * Checks whether this and another {@link #Nucleotide} make reference to the same nucleotide(s). + *

+ * In contrast with {@link #equals}, this method will return {@code false} if any of the two, this + * or the input nucleotide is the {@link #INVALID} enum value. So even

{@link #INVALID}.same({@link #INVALID})
+ * will return {@code false}. + *

+ * + * @param other the other nucleotide. + * @return {@code true} iff this and the input nucleotide make reference to the same nucleotides. + */ + public boolean same(final Nucleotide other) { + return this == other && this != INVALID; + } + + /** + * Returns the complement nucleotide code for this one. + *

+ * For ambiguous nucleotide codes, this will return the ambiguous code that encloses the complement of + * each possible nucleotide in this code. + *

+ *

+ * The complement of the {@link #INVALID} nucleotide is itself. + *

+ * @return never {@code null}. + */ + public Nucleotide complement() { + if (complement == null) { + final int complementMask = ((mask & A.mask) != 0 ? T.mask : 0) + | ((mask & T.mask) != 0 ? A.mask : 0) + | ((mask & C.mask) != 0 ? G.mask : 0) + | ((mask & G.mask) != 0 ? C.mask : 0); + complement = maskToValue[complementMask]; + } + return complement; } /** - * Checks whether the nucleotide refer to a concrete (rather than ambiguous) base. - * @return + * Returns the complement for a base code. + *

+ * When an invalid base is provided this method will return the default encoding for the {@link #INVALID} nucleotide. + *

+ * @param b the input base + * @param upperCase whether to return the uppercase ({@code true}) or the lower case ({@code false}) byte encoding. + * @return the complement of the input. */ - public boolean isConcrete() { - return ordinal() < N.ordinal(); + public static byte complement(final byte b, final boolean upperCase) { + final Nucleotide value = decode(b); + final Nucleotide compl = value.complement(); + return compl.encodeAsByte(upperCase); } /** - * Helper class to count the number of occurrences of each nucleotide in + * Returns the complement for a base code. + *

+ * The case of the output will match the case of the input. + *

+ *

+ * When an invalid base is provided this method will return the default encoding for the {@link #INVALID} nucleotide. + *

+ * @param b the input base + * @return the complement of the input. + */ + public static byte complement(final byte b) { + return complement(b, Character.isUpperCase(b)); + } + + /** + * Returns the instance that would include all possible transition mutations from this one. + * @return never {@code null}. + */ + public Nucleotide transition() { + if (transition == null) { + final int transitionMask = ((mask & A.mask) != 0 ? G.mask : 0) + | ((mask & G.mask) != 0 ? A.mask : 0) + | ((mask & C.mask) != 0 ? T.mask : 0) + | ((mask & T.mask) != 0 ? C.mask : 0); + transition = maskToValue[transitionMask]; + } + return transition; + } + + /** + * Returns the instance that would include all possible tranversion mutations from nucleotides included + * in this one. + * @return never {@code null}. + */ + public Nucleotide transversion() { + if (transversion == null) { + final int transversionMask = ((mask & PURINE.mask) != 0 ? PYRIMIDINE.mask : 0) + | ((mask & PYRIMIDINE.mask) != 0 ? PURINE.mask : 0); + transversion = maskToValue[transversionMask]; + } + return transversion; + } + + /** + * Transvertion mutation toward a strong or a weak base. + *

+ * This method provides a non-ambiguous alternative to {@link #transversion()} for + * concrete nucleotides. + *

+ * + * @param strong whether the result should be a strong ({@code S: G, C}) or weak ({@code W: A, T}) nucleotide(s). + * @return nucleotides that may emerged from such a transversion. + */ + public Nucleotide transversion(final boolean strong) { + return transversion().intersect(strong ? STRONG : WEAK); + } + + /** + * Helper class to count the number of occurrences of each nucleotide code in * a sequence. */ - public static class Counter { + public static final class Counter { + private final long[] counts; /** * Creates a new counter with all counts set to 0. */ public Counter() { - counts = new long[Nucleotide.values().length]; + counts = new long[NUMBER_OF_CONSTANTS]; } /** @@ -115,7 +359,7 @@ public void add(final Nucleotide nucleotide) { * @throws IllegalArgumentException if {@code base} is {@code negative}. */ public void add(final byte base) { - add(valueOf(base)); + add(decode(base)); } /** @@ -135,7 +379,7 @@ public long get(final Nucleotide nucleotide) { * @throws IllegalArgumentException if {@code bases} are null or * it contains negative values. */ - public void addAll(final byte[] bases) { + public final void addAll(final byte ... bases) { Utils.nonNull(bases); for (final byte base : bases) { add(base); @@ -149,6 +393,10 @@ public void clear() { Arrays.fill(counts, 0); } + /** + * Return the total count of all nucleotide constants. + * @return 0 or greater. + */ public long sum() { return LongStream.of(counts).sum(); } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/reference/FastaReferenceWriter.java b/src/main/java/org/broadinstitute/hellbender/utils/reference/FastaReferenceWriter.java index 71864625631..b81be29cf8f 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/reference/FastaReferenceWriter.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/reference/FastaReferenceWriter.java @@ -315,7 +315,7 @@ private static void checkSequenceBases(final byte[] bases, final int offset, fin final int to = offset + length; for (int i = offset; i < to; i++) { final byte b = bases[i]; - if (Nucleotide.valueOf(b) == Nucleotide.INVALID) { + if (!Nucleotide.decode(b).isValid()) { throw new IllegalArgumentException( "the input sequence contains invalid base calls like: " + StringUtils.escape(""+ (char) b)); } @@ -541,7 +541,7 @@ private void writeDictEntry() { * @param bases array containing the bases to be added. * @return this instance. * @throws IllegalArgumentException if {@bases} is {@code null} or - * the input array contains invalid bases (as assessed by: {@link Nucleotide#valueOf(byte)}). + * the input array contains invalid bases (as assessed by: {@link Nucleotide#decode(byte)}). * @throws IllegalStateException if no sequence was started or the writer is already closed. * @throws IOException if such exception is throw when writing in any of the outputs. */ @@ -560,7 +560,7 @@ public FastaReferenceWriter appendBases(final byte[] bases) * @return this instance. * @throws IllegalArgumentException if {@bases} is {@code null} or * {@code offset} and {@code length} do not entail a valid range in {@code bases} or - * that range in {@base} contain invalid bases (as assessed by: {@link Nucleotide#valueOf(byte)}). + * that range in {@base} contain invalid bases (as assessed by: {@link Nucleotide#decode(byte)}). * @throws IllegalStateException if no sequence was started or the writer is already closed. * @throws IOException if such exception is throw when writing in any of the outputs. */ diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModelEngineUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModelEngineUnitTest.java index 3faa174b8a0..95a0882a28c 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModelEngineUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModelEngineUnitTest.java @@ -395,7 +395,7 @@ private Histogram createRefHistograms(final String refContext, final in */ private List> createDepthOneAltHistograms(final String refContext, final int depth, final int numExamples) { final List> altComputationalHistograms = new ArrayList<>(F1R2FilterConstants.numAltHistogramsPerContext); - for (final Nucleotide altAllele : Nucleotide.REGULAR_BASES){ + for (final Nucleotide altAllele : Nucleotide.STANDARD_DNA_BASES){ if (altAllele == F1R2FilterUtils.getMiddleBase(refContext)){ continue; } diff --git a/src/test/java/org/broadinstitute/hellbender/utils/NucleotideUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/NucleotideUnitTest.java index a3461a72202..cb7c8a8b06f 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/NucleotideUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/NucleotideUnitTest.java @@ -19,65 +19,276 @@ public class NucleotideUnitTest { private static final int MAX_RANDOM_SEQ_LENGTH = 100; private static final int NUMBER_OF_RANDOM_SEQUENCES = 10; - @Test - public void testToBase() { - Assert.assertEquals(Nucleotide.A.toBase(), (byte)'A'); - Assert.assertEquals(Nucleotide.C.toBase(), (byte)'C'); - Assert.assertEquals(Nucleotide.G.toBase(), (byte)'G'); - Assert.assertEquals(Nucleotide.N.toBase(), (byte)'N'); - Assert.assertEquals(Nucleotide.T.toBase(), (byte)'T'); - Assert.assertEquals(Nucleotide.X.toBase(), (byte)'X'); + @Test(dataProvider = "values") + public void testEncodeAsByte(final Nucleotide nuc) { + // Will always use the first letter of the constant as the one byte encoding. + final char firstLetter = nuc.name().charAt(0); + final byte expectedLowerEncoding = (byte) Character.toLowerCase(firstLetter); + final byte expectedUpperEncoding = (byte) Character.toUpperCase(firstLetter); + Assert.assertEquals(nuc.encodeAsByte(), expectedUpperEncoding); // by default is upper case. + Assert.assertEquals(nuc.encodeAsByte(true), expectedUpperEncoding); + Assert.assertEquals(nuc.encodeAsByte(false), expectedLowerEncoding); } - @Test - public void testIsConcrete() { - for (final Nucleotide nuc : Nucleotide.values()) { - switch (nuc) { - case A: - case C: - case T: - case G: - Assert.assertTrue(nuc.isConcrete()); - break; - default: - Assert.assertFalse(nuc.isConcrete()); - } + @Test(dataProvider = "values") + public void testIsConcrete(final Nucleotide nuc) { + switch (nuc) { + case A: + case C: + case T: + case G: + Assert.assertTrue(nuc.isStandard()); + break; + default: + Assert.assertFalse(nuc.isStandard()); } } - @Test(expectedExceptions = UnsupportedOperationException.class) - public void testToBaseOnInvalid() { - Nucleotide.INVALID.toBase(); + @Test(dataProvider = "values") + public void testIsAmbiguous(final Nucleotide nuc) { + switch (nuc) { + case X: + case A: + case C: + case T: + case G: + Assert.assertFalse(nuc.isAmbiguous()); + break; + default: + Assert.assertTrue(nuc.isAmbiguous()); + } + } + + @Test(dataProvider = "values") + public void testIsValid(final Nucleotide nuc) { + switch (nuc) { + case X: + Assert.assertFalse(nuc.isValid()); + break; + default: + Assert.assertTrue(nuc.isValid()); + } } @Test - public void testValueOfBase() { + public void testDecode() { for (byte i = 0; i >= 0; i++) { final Nucleotide expected; switch (i) { case 'a': - case 'A': expected = Nucleotide.A; break; + case 'A': + expected = Nucleotide.A; + break; case 'c': - case 'C': expected = Nucleotide.C; break; + case 'C': + expected = Nucleotide.C; + break; case 'g': - case 'G': expected = Nucleotide.G; break; + case 'G': + expected = Nucleotide.G; + break; case 't': case 'T': case 'u': - case 'U': expected = Nucleotide.T; break; + case 'U': + expected = Nucleotide.T; + break; case 'n': - case 'N': expected = Nucleotide.N; break; + case 'N': + expected = Nucleotide.N; + break; case 'x': - case 'X': expected = Nucleotide.X; break; - default : expected = Nucleotide.INVALID; + case 'X': + expected = Nucleotide.X; + break; + case 'r': + case 'R': + expected = Nucleotide.R; + break; + case 'b': + case 'B': + expected = Nucleotide.B; + break; + case 'v': + case 'V': + expected = Nucleotide.V; + break; + case 'y': + case 'Y': + expected = Nucleotide.Y; + break; + case 's': + case 'S': + expected = Nucleotide.S; + break; + case 'w': + case 'W': + expected = Nucleotide.W; + break; + case 'k': + case 'K': + expected = Nucleotide.K; + break; + case 'm': + case 'M': + expected = Nucleotide.M; + break; + case 'd': + case 'D': + expected = Nucleotide.D; + break; + case 'h': + case 'H': + expected = Nucleotide.H; + break; + default: + expected = Nucleotide.X; + } + Assert.assertSame(Nucleotide.decode(i), expected, "Failed with base " + i + " returning nucleotide " + Nucleotide.decode(i)); + Assert.assertSame(Nucleotide.decode((char)i), expected, "Failed with base " + i + " returning nucleotide " + Nucleotide.decode((char)i)); + } + } + + @Test(dataProvider = "values") + public void testIncludes(final Nucleotide nuc) { + if (nuc.isStandard()) { + for (final Nucleotide other : Nucleotide.values()) { + if (other.isStandard()) { + Assert.assertEquals(nuc.includes(other), nuc == other); + Assert.assertEquals(nuc.includes(other.encodeAsByte()), nuc == other); + Assert.assertEquals(nuc.includes(other.encodeAsByte(false)), nuc == other); + } else { + Assert.assertFalse(nuc.includes(other)); + Assert.assertFalse(nuc.includes(other.encodeAsByte())); + Assert.assertFalse(nuc.includes(other.encodeAsByte(false))); + } + } + } else if (nuc.isAmbiguous()) { + for (final Nucleotide other : Nucleotide.values()) { + final boolean thisA = nuc.includes(Nucleotide.A); + final boolean thisC = nuc.includes(Nucleotide.C); + final boolean thisG = nuc.includes(Nucleotide.G); + final boolean thisT = nuc.includes(Nucleotide.T); + final boolean otherA = other.includes(Nucleotide.A); + final boolean otherC = other.includes(Nucleotide.C); + final boolean otherG = other.includes(Nucleotide.G); + final boolean otherT = other.includes(Nucleotide.T); + final boolean includes = other.isValid() && (thisA == otherA || thisA) + && (thisC == otherC || thisC) + && (thisG == otherG || thisG) + && (thisT == otherT || thisT); + Assert.assertEquals(nuc.includes(other), includes, "" + nuc + " " + other); + Assert.assertEquals(nuc.includes(other.encodeAsByte()), includes); + Assert.assertEquals(nuc.includes(other.encodeAsByte(false)), includes); + } + } else { // invalid + for (final Nucleotide other : Nucleotide.values()) { + Assert.assertFalse(nuc.includes(other)); + Assert.assertFalse(nuc.includes(other.encodeAsByte())); + Assert.assertFalse(nuc.includes(other.encodeAsByte(false))); } - Assert.assertSame(Nucleotide.valueOf(i), expected, "Failed with base " + i + " returning nucleotide " + Nucleotide.valueOf(i)); + } + + } + + @Test(dataProvider = "values") + public void testIntersects(final Nucleotide nuc) { + final boolean thisA = nuc.includes(Nucleotide.A); + final boolean thisC = nuc.includes(Nucleotide.C); + final boolean thisG = nuc.includes(Nucleotide.G); + final boolean thisT = nuc.includes(Nucleotide.T); + for (final Nucleotide other : Nucleotide.values()) { + final boolean otherA = other.includes(Nucleotide.A); + final boolean otherC = other.includes(Nucleotide.C); + final boolean otherG = other.includes(Nucleotide.G); + final boolean otherT = other.includes(Nucleotide.T); + final Nucleotide intersect = nuc.intersect(other); + Assert.assertNotNull(intersect); + Assert.assertEquals(intersect.includes(Nucleotide.A), thisA && otherA); + Assert.assertEquals(intersect.includes(Nucleotide.C), thisC && otherC); + Assert.assertEquals(intersect.includes(Nucleotide.G), thisG && otherG); + Assert.assertEquals(intersect.includes(Nucleotide.T), thisT && otherT); + } + } + + @Test(dataProvider = "values") + public void testComplement(final Nucleotide nuc) { + final boolean thisA = nuc.includes(Nucleotide.A); + final boolean thisC = nuc.includes(Nucleotide.C); + final boolean thisG = nuc.includes(Nucleotide.G); + final boolean thisT = nuc.includes(Nucleotide.T); + final Nucleotide complement = nuc.complement(); + final boolean compA = complement.includes(Nucleotide.A); + final boolean compC = complement.includes(Nucleotide.C); + final boolean compG = complement.includes(Nucleotide.G); + final boolean compT = complement.includes(Nucleotide.T); + final String errorMessage = "Failure with " + nuc + " result in complement " + complement; + Assert.assertEquals(compA, thisT, errorMessage); + Assert.assertEquals(compT, thisA, errorMessage); + Assert.assertEquals(compC, thisG, errorMessage); + Assert.assertEquals(compG, thisC, errorMessage); + } + + @Test(dataProvider = "values") + public void testTransition(final Nucleotide nuc) { + final boolean thisA = nuc.includes(Nucleotide.A); + final boolean thisC = nuc.includes(Nucleotide.C); + final boolean thisG = nuc.includes(Nucleotide.G); + final boolean thisT = nuc.includes(Nucleotide.T); + final Nucleotide trans = nuc.transition(); + final boolean tranA = trans.includes(Nucleotide.A); + final boolean tranC = trans.includes(Nucleotide.C); + final boolean tranG = trans.includes(Nucleotide.G); + final boolean tranT = trans.includes(Nucleotide.T); + final String errorMessage = "Failure with " + nuc + " result in transition " + trans; + Assert.assertEquals(tranA, thisG, errorMessage); + Assert.assertEquals(tranG, thisA, errorMessage); + Assert.assertEquals(tranC, thisT, errorMessage); + Assert.assertEquals(tranT, thisC, errorMessage); + } + + @Test(dataProvider = "values") + public void testTransversion(final Nucleotide nuc) { + final boolean thisA = nuc.includes(Nucleotide.A); + final boolean thisC = nuc.includes(Nucleotide.C); + final boolean thisG = nuc.includes(Nucleotide.G); + final boolean thisT = nuc.includes(Nucleotide.T); + final Nucleotide trans = nuc.transversion(); + final boolean tranA = trans.includes(Nucleotide.A); + final boolean tranC = trans.includes(Nucleotide.C); + final boolean tranG = trans.includes(Nucleotide.G); + final boolean tranT = trans.includes(Nucleotide.T); + final String errorMessage = "Failure with " + nuc + " result in transversion " + trans; + Assert.assertEquals(tranA, thisC || thisT, errorMessage); + Assert.assertEquals(tranG, thisC || thisT, errorMessage); + Assert.assertEquals(tranC, thisA || thisG, errorMessage); + Assert.assertEquals(tranT, thisA || thisG, errorMessage); + final Nucleotide transStrong = nuc.transversion(true); + final Nucleotide transWeak = nuc.transversion(false); + Assert.assertTrue(trans.includes(transStrong) || trans == Nucleotide.X || transStrong == Nucleotide.X); + Assert.assertTrue(trans.includes(transWeak) || trans == Nucleotide.X || transStrong == Nucleotide.X); + Assert.assertSame(transStrong.intersect(transWeak), Nucleotide.X); + Assert.assertEquals(transStrong.includes(Nucleotide.C), tranC); + Assert.assertEquals(transStrong.includes(Nucleotide.G), tranG); + Assert.assertEquals(transWeak.includes(Nucleotide.A), tranA); + Assert.assertEquals(transWeak.includes(Nucleotide.T), tranT); + } + + @Test(dataProvider = "values") + public void testSame(final Nucleotide nuc) { + for (final Nucleotide other : Nucleotide.values()) { + final boolean reallyTheSame = nuc != Nucleotide.INVALID && nuc == other; + Assert.assertEquals(nuc.same(other), reallyTheSame); + Assert.assertEquals(Nucleotide.same(nuc.encodeAsByte(), other.encodeAsByte()), reallyTheSame); + Assert.assertEquals(Nucleotide.same(nuc.encodeAsByte(false), other.encodeAsByte()), reallyTheSame); + Assert.assertEquals(Nucleotide.same(nuc.encodeAsByte(), other.encodeAsByte(false)), reallyTheSame); + Assert.assertEquals(Nucleotide.same(nuc.encodeAsByte(false), other.encodeAsByte(false)), reallyTheSame); } } @Test(expectedExceptions = IllegalArgumentException.class) public void testValueOfNegativeBase() { - Nucleotide.valueOf((byte) -10); + Nucleotide.decode((byte) -10); } @Test @@ -88,13 +299,13 @@ public void testNucleotideCounterInit() { } } - @Test(dependsOnMethods = "testValueOfBase", dataProvider = "testSequences") + @Test(dependsOnMethods = "testDecode", dataProvider = "testSequences") public void testAddingOneByOne(final byte[] bases) { final Nucleotide.Counter subject = new Nucleotide.Counter(); final Map shadow = new HashMap<>(Nucleotide.values().length); for (final byte base : bases) { subject.add(base); - final Nucleotide nuc = Nucleotide.valueOf(base); + final Nucleotide nuc = Nucleotide.decode(base); shadow.put(nuc, shadow.getOrDefault(nuc, 0) + 1); for (final Nucleotide n : Nucleotide.values()) { Assert.assertEquals(subject.get(n), (long) shadow.getOrDefault(n, 0)); @@ -103,12 +314,12 @@ public void testAddingOneByOne(final byte[] bases) { Assert.assertEquals(subject.sum(), shadow.values().stream().mapToLong(l -> l).sum()); } - @Test(dependsOnMethods = "testValueOfBase", dataProvider = "testSequences") + @Test(dependsOnMethods = "testDecode", dataProvider = "testSequences") public void testAddingAllAtOnce(final byte[] bases) { final Nucleotide.Counter subject = new Nucleotide.Counter(); final Map shadow = new HashMap<>(Nucleotide.values().length); for (final byte base : bases) { - final Nucleotide nuc = Nucleotide.valueOf(base); + final Nucleotide nuc = Nucleotide.decode(base); shadow.put(nuc, shadow.getOrDefault(nuc, 0) + 1); } subject.addAll(bases); @@ -121,24 +332,21 @@ public void testAddingAllAtOnce(final byte[] bases) { @Test(expectedExceptions = IllegalArgumentException.class) public void testAddingAllAtOnceOnANullArray() { final Nucleotide.Counter subject = new Nucleotide.Counter(); - subject.addAll(null); + subject.addAll((byte[]) null); } - @Test(expectedExceptions = IllegalArgumentException.class) public void testAddingAllAtOnceWithNegativeBases() { final Nucleotide.Counter subject = new Nucleotide.Counter(); - subject.addAll(new byte[] { 'a', 'A', -10, 'C' } ); + subject.addAll(new byte[]{'a', 'A', -10, 'C'}); } - - - @Test(dependsOnMethods = "testValueOfBase", dataProvider = "testSequences") + @Test(dependsOnMethods = "testDecode", dataProvider = "testSequences") public void testClear(final byte[] bases) { final Nucleotide.Counter subject = new Nucleotide.Counter(); final Map shadow = new HashMap<>(Nucleotide.values().length); for (final byte base : bases) { - final Nucleotide nuc = Nucleotide.valueOf(base); + final Nucleotide nuc = Nucleotide.decode(base); shadow.put(nuc, shadow.getOrDefault(nuc, 0) + 1); } subject.addAll(bases); @@ -152,22 +360,87 @@ public void testClear(final byte[] bases) { Assert.assertEquals(subject.sum(), 0); } + @Test + public void testUracilSameAsThymine() { + Assert.assertSame(Nucleotide.U, Nucleotide.T); + } + + @Test + public void testLongFormNames() { + Assert.assertSame(Nucleotide.ADENINE, Nucleotide.A); + Assert.assertSame(Nucleotide.THYMINE, Nucleotide.T); + Assert.assertSame(Nucleotide.GUANINE, Nucleotide.G); + Assert.assertSame(Nucleotide.CYTOSINE, Nucleotide.C); + Assert.assertSame(Nucleotide.URACIL, Nucleotide.U); + Assert.assertSame(Nucleotide.ANY, Nucleotide.N); + Assert.assertSame(Nucleotide.UNKNOWN, Nucleotide.N); + Assert.assertSame(Nucleotide.PURINE, Nucleotide.R); + Assert.assertSame(Nucleotide.PYRIMIDINE, Nucleotide.Y); + Assert.assertSame(Nucleotide.INVALID, Nucleotide.X); + Assert.assertSame(Nucleotide.STRONG, Nucleotide.S); + Assert.assertSame(Nucleotide.WEAK, Nucleotide.W); + Assert.assertSame(Nucleotide.KETO, Nucleotide.K); + Assert.assertSame(Nucleotide.AMINO, Nucleotide.M); + } + + @Test + public void testLongFormNamesForTypos() { + // We avoid a direct comparison of the constant value (rather than indirectly using its name) + // because that would typos and here the names matter and are unlikely to change even in the long term. + Assert.assertSame(constantNameToInstance("ADENINE"), Nucleotide.A); + Assert.assertSame(constantNameToInstance("THYMINE"), Nucleotide.T); + Assert.assertSame(constantNameToInstance("GUANINE"), Nucleotide.G); + Assert.assertSame(constantNameToInstance("CYTOSINE"), Nucleotide.C); + Assert.assertSame(constantNameToInstance("URACIL"), Nucleotide.U); + Assert.assertSame(constantNameToInstance("ANY"), Nucleotide.N); + Assert.assertSame(constantNameToInstance("UNKNOWN"), Nucleotide.N); + Assert.assertSame(constantNameToInstance("PURINE"), Nucleotide.R); + Assert.assertSame(constantNameToInstance("PYRIMIDINE"), Nucleotide.Y); + Assert.assertSame(constantNameToInstance("INVALID"), Nucleotide.X); + Assert.assertSame(constantNameToInstance("STRONG"), Nucleotide.S); + Assert.assertSame(constantNameToInstance("WEAK"), Nucleotide.W); + Assert.assertSame(constantNameToInstance("KETO"), Nucleotide.K); + Assert.assertSame(constantNameToInstance("AMINO"), Nucleotide.M); + } + + private Nucleotide constantNameToInstance(final String name) { + try { + return (Nucleotide) Nucleotide.class.getField(name).get(null); + } catch (final IllegalAccessException e) { + Assert.fail("Long name constant " + name + " is not accessible"); + } catch (final NoSuchFieldException e) { + Assert.fail("Long name constant " + name + " does not exists"); + } catch (final ClassCastException e) { + Assert.fail("Long name constant " + name + " so not typed as " + Nucleotide.class.getName()); + } + throw new IllegalStateException("unreachable code"); + } + @DataProvider(name = "testSequences") public Object[][] testSequences() { final List result = new ArrayList<>(); // We add non random trivial sequences: - result.add(new Object[] { new byte[0] }); + result.add(new Object[]{new byte[0]}); for (final Nucleotide nuc : Nucleotide.values()) { if (nuc == Nucleotide.INVALID) { continue; } - result.add( new Object[] { new byte[] { nuc.toBase() } }); - result.add( new Object[] { Utils.repeatBytes( nuc.toBase(), MIN_RANDOM_SEQ_LENGTH) }); + result.add(new Object[]{new byte[]{nuc.encodeAsByte()}}); + result.add(new Object[]{Utils.repeatBytes(nuc.encodeAsByte(), MIN_RANDOM_SEQ_LENGTH)}); } for (int i = 0; i < NUMBER_OF_RANDOM_SEQUENCES; i++) { final int length = random.nextInt(MAX_RANDOM_SEQ_LENGTH - MIN_RANDOM_SEQ_LENGTH + 1) + MIN_RANDOM_SEQ_LENGTH; final byte[] base = randomDNA.nextBases(length); - result.add(new Object[] { base }); + result.add(new Object[]{base}); + } + return result.toArray(new Object[result.size()][]); + } + + @DataProvider(name = "values") + public Object[][] values() { + final List result = new ArrayList<>(Nucleotide.values().length); + for (final Nucleotide nuc : Nucleotide.values()) { + result.add(new Object[]{nuc}); } return result.toArray(new Object[result.size()][]); } diff --git a/src/test/java/org/broadinstitute/hellbender/utils/RandomDNA.java b/src/test/java/org/broadinstitute/hellbender/utils/RandomDNA.java index 45dbfb5aebd..6459672acad 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/RandomDNA.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/RandomDNA.java @@ -58,10 +58,10 @@ public final class RandomDNA { protected final Random random; private final byte[] codeToBase = new byte[] { - Nucleotide.A.toBase(), - Nucleotide.C.toBase(), - Nucleotide.G.toBase(), - Nucleotide.T.toBase() + Nucleotide.A.encodeAsByte(), + Nucleotide.C.encodeAsByte(), + Nucleotide.G.encodeAsByte(), + Nucleotide.T.encodeAsByte() }; private final byte[] nextBases; diff --git a/src/test/java/org/broadinstitute/hellbender/utils/RandomDNAUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/RandomDNAUnitTest.java index 6b4add1ce9d..c8eace40c09 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/RandomDNAUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/RandomDNAUnitTest.java @@ -5,7 +5,6 @@ import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation; -import org.apache.commons.math3.stat.inference.ChiSquareTest; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -150,8 +149,8 @@ private void assertFastaFileAndDictMatch(final File fastaFile, final int basesPe final String lineBases = line.trim(); final String nextLine = reader.readLine(); for (final byte base : lineBases.getBytes()) { - final Nucleotide nuc = Nucleotide.valueOf(base); - Assert.assertTrue(nuc.isConcrete()); + final Nucleotide nuc = Nucleotide.decode(base); + Assert.assertTrue(nuc.isStandard()); frequencies.add(nuc); } if (nextLine != null && !nextLine.matches("^>.*$")){