diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCounts.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCounts.java
index 6b63c8f70cf..56339b68296 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCounts.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCounts.java
@@ -149,6 +149,6 @@ public Object onTraversalSuccess() {
@Override
public void apply(AlignmentContext alignmentContext, ReferenceContext referenceContext, FeatureContext featureContext) {
final byte refAsByte = referenceContext.getBase();
- allelicCountCollector.collectAtLocus(Nucleotide.valueOf(refAsByte), alignmentContext.getBasePileup(), alignmentContext.getLocation(), minimumBaseQuality);
+ allelicCountCollector.collectAtLocus(Nucleotide.decode(refAsByte), alignmentContext.getBasePileup(), alignmentContext.getLocation(), minimumBaseQuality);
}
}
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/PreprocessIntervals.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/PreprocessIntervals.java
index 3af7c4d93d6..dc33c63ad09 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/PreprocessIntervals.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/PreprocessIntervals.java
@@ -202,7 +202,7 @@ private static IntervalList generateBins(final IntervalList preparedIntervalList
private static IntervalList filterBinsContainingOnlyNs(final IntervalList unfilteredBins, final ReferenceDataSource reference) {
final IntervalList bins = new IntervalList(reference.getSequenceDictionary());
for (final Interval unfilteredBin : unfilteredBins) {
- if (!Utils.stream(reference.query(new SimpleInterval(unfilteredBin))).allMatch(b -> b == Nucleotide.N.toBase())) {
+ if (!Utils.stream(reference.query(new SimpleInterval(unfilteredBin))).allMatch(b -> Nucleotide.decode(b) == Nucleotide.N)) {
bins.add(unfilteredBin);
}
}
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AllelicCountCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AllelicCountCollection.java
index 6367420b20d..e1ae69e1649 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AllelicCountCollection.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AllelicCountCollection.java
@@ -40,8 +40,8 @@ enum AllelicCountTableColumn {
final int position = dataLine.getInt(AllelicCountTableColumn.POSITION);
final int refReadCount = dataLine.getInt(AllelicCountTableColumn.REF_COUNT);
final int altReadCount = dataLine.getInt(AllelicCountTableColumn.ALT_COUNT);
- final Nucleotide refNucleotide = Nucleotide.valueOf(dataLine.get(AllelicCountTableColumn.REF_NUCLEOTIDE.name()).getBytes()[0]);
- final Nucleotide altNucleotide = Nucleotide.valueOf(dataLine.get(AllelicCountTableColumn.ALT_NUCLEOTIDE.name()).getBytes()[0]);
+ final Nucleotide refNucleotide = Nucleotide.decode(dataLine.get(AllelicCountTableColumn.REF_NUCLEOTIDE.name()).charAt(0));
+ final Nucleotide altNucleotide = Nucleotide.decode(dataLine.get(AllelicCountTableColumn.ALT_NUCLEOTIDE.name()).charAt(0));
final SimpleInterval interval = new SimpleInterval(contig, position, position);
return new AllelicCount(interval, refReadCount, altReadCount, refNucleotide, altNucleotide);
};
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/AltSiteRecord.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/AltSiteRecord.java
index 420a7ae12e1..842a5c81134 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/AltSiteRecord.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/AltSiteRecord.java
@@ -29,7 +29,7 @@ public class AltSiteRecord {
public AltSiteRecord(final String referenceContext, final int refCount, final int altCount,
final int refF1R2, final int altF1R2, final Nucleotide altAllele){
- Utils.validateArg(Nucleotide.REGULAR_BASES.contains(altAllele), "altAllele must be one of {A,C,G,T} but got " + altAllele);
+ Utils.validateArg(altAllele != null && altAllele.isStandard(), "altAllele must be one of {A,C,G,T} but got " + altAllele);
this.referenceContext = referenceContext;
this.refCount = refCount;
@@ -63,7 +63,7 @@ public AltSiteRecord(final String referenceContext, final int refCount, final in
public AltSiteRecord getReverseComplementOfRecord(){
Utils.validate(!F1R2FilterConstants.CANONICAL_KMERS.contains(referenceContext), "for consistency, don't make the " +
"revcomp record of a canonical reference context");
- final Nucleotide revCompOfAlt = Nucleotide.complement(altAllele.toBase());
+ final Nucleotide revCompOfAlt = altAllele.complement();
final int newRefF1R2 = refCount - refF1R2;
final int newAltF1R2 = altCount - altF1R2;
return new AltSiteRecord(SequenceUtil.reverseComplement(referenceContext), refCount, altCount, newRefF1R2,
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/CollectF1R2Counts.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/CollectF1R2Counts.java
index 7f24c1032f5..2b3514b6f43 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/CollectF1R2Counts.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/CollectF1R2Counts.java
@@ -154,14 +154,14 @@ public void apply(final AlignmentContext alignmentContext, final ReferenceContex
}
// If we got here, we have an alt site with a single alt base
- final Nucleotide altBase = Nucleotide.valueOf(BaseUtils.baseIndexToSimpleBase(altBaseIndex));
+ final Nucleotide altBase = Nucleotide.decode(BaseUtils.baseIndexToSimpleBase(altBaseIndex));
final int refCount = baseCounts[refBase.ordinal()];
final int altCount = baseCounts[altBaseIndex];
Utils.validate(altCount > 0, "We must have a nonzero alt read but got " + altCount);
- final int refF1R2 = pileup.getNumberOfElements(pe -> Nucleotide.valueOf(pe.getBase()) == refBase && ReadUtils.isF1R2(pe.getRead()));
- final int altF1R2 = pileup.getNumberOfElements(pe -> Nucleotide.valueOf(pe.getBase()) == altBase && ReadUtils.isF1R2(pe.getRead()));
+ final int refF1R2 = pileup.getNumberOfElements(pe -> Nucleotide.decode(pe.getBase()) == refBase && ReadUtils.isF1R2(pe.getRead()));
+ final int altF1R2 = pileup.getNumberOfElements(pe -> Nucleotide.decode(pe.getBase()) == altBase && ReadUtils.isF1R2(pe.getRead()));
if (altCount == 1) {
final ReadOrientation type = altF1R2 == 1 ? F1R2 : F2R1;
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/DepthOneHistograms.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/DepthOneHistograms.java
index b7fb44d2fb9..d5a8a96bcc7 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/DepthOneHistograms.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/DepthOneHistograms.java
@@ -24,9 +24,9 @@ public DepthOneHistograms(final int maxDepth) {
// Initialize, for each reference context, the (Alt Allele, Artifact Type) -> Histogram map
F1R2FilterConstants.ALL_KMERS.forEach(context -> {
- map.put(context, new HashMap<>((Nucleotide.REGULAR_BASES.size() - 1) * ReadOrientation.SIZE));
+ map.put(context, new HashMap<>((Nucleotide.STANDARD_DNA_BASES.size() - 1) * ReadOrientation.SIZE));
- for (Nucleotide altAllele : Nucleotide.REGULAR_BASES) {
+ for (Nucleotide altAllele : Nucleotide.STANDARD_DNA_BASES) {
// Skip e.g. AGT -> AGT because G is not an alt allele
if (altAllele == F1R2FilterUtils.getMiddleBase(context)) {
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/F1R2FilterConstants.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/F1R2FilterConstants.java
index b25cb5c7da4..7c964d4129d 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/F1R2FilterConstants.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/F1R2FilterConstants.java
@@ -40,7 +40,7 @@ public class F1R2FilterConstants {
// We combine all sites of depths above this value in the last bin of the histogram
static final int DEFAULT_MAX_DEPTH = 200;
- static final int numAltHistogramsPerContext = (Nucleotide.REGULAR_BASES.size() - 1) * (ReadOrientation.values().length);
+ static final int numAltHistogramsPerContext = (Nucleotide.STANDARD_DNA_BASES.size() - 1) * (ReadOrientation.values().length);
public static Integer[] getEmptyBins(final int maxDepth){
return IntStream.rangeClosed(1, maxDepth).boxed().toArray( Integer[]::new );
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModel.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModel.java
index 0f96d902a5c..b371a4fe450 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModel.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModel.java
@@ -180,7 +180,7 @@ public static List
- * This enumeration not only contains concrete nucleotides, but also
+ * This enumeration not only contains standard (non-ambiguous) nucleotides, but also
* values to represent ambiguous and invalid codes.
*
* The {@link #INVALID} nucleotide does not have an actual base then resulting in an exception.
*
+ * This method is a shorthard for:
+ * {@link #decode}(a){@link #same(Nucleotide) same}({@link #decode}(b))
.
+ *
+ * The order of the inputs is not relevant, therefore {@code same(a, b) == same(b, a)} for any + * given {@code a} and {@code b}. + *
+ *+ * Notice that if either or both input bases make reference to an invalid nucleotide (i.e.
{@link #decode}(x) == {@link #INVALID}}, + * this method will return {@code false} even if {@code a == b}. + * + * @param a the first base to compare (however order is not relevant). + * @param b the second base to compare (however order is not relevant). + * @return {@code true} iff {@code {@link #decode}}.same({@link #decode}(b))}} + */ + public static boolean same(final byte a, final byte b) { + return baseToValue[a] == baseToValue[b] && baseToValue[a] != INVALID; + } + + /** + * Checks whether this and another {@link #Nucleotide} make reference to the same nucleotide(s). + *+ * In contrast with {@link #equals}, this method will return {@code false} if any of the two, this + * or the input nucleotide is the {@link #INVALID} enum value. So even
{@link #INVALID}.same({@link #INVALID})+ * will return {@code false}. + * + * + * @param other the other nucleotide. + * @return {@code true} iff this and the input nucleotide make reference to the same nucleotides. + */ + public boolean same(final Nucleotide other) { + return this == other && this != INVALID; + } + + /** + * Returns the complement nucleotide code for this one. + *+ * For ambiguous nucleotide codes, this will return the ambiguous code that encloses the complement of + * each possible nucleotide in this code. + *
+ *+ * The complement of the {@link #INVALID} nucleotide is itself. + *
+ * @return never {@code null}. + */ + public Nucleotide complement() { + if (complement == null) { + final int complementMask = ((mask & A.mask) != 0 ? T.mask : 0) + | ((mask & T.mask) != 0 ? A.mask : 0) + | ((mask & C.mask) != 0 ? G.mask : 0) + | ((mask & G.mask) != 0 ? C.mask : 0); + complement = maskToValue[complementMask]; + } + return complement; } /** - * Checks whether the nucleotide refer to a concrete (rather than ambiguous) base. - * @return + * Returns the complement for a base code. + *+ * When an invalid base is provided this method will return the default encoding for the {@link #INVALID} nucleotide. + *
+ * @param b the input base + * @param upperCase whether to return the uppercase ({@code true}) or the lower case ({@code false}) byte encoding. + * @return the complement of the input. */ - public boolean isConcrete() { - return ordinal() < N.ordinal(); + public static byte complement(final byte b, final boolean upperCase) { + final Nucleotide value = decode(b); + final Nucleotide compl = value.complement(); + return compl.encodeAsByte(upperCase); } /** - * Helper class to count the number of occurrences of each nucleotide in + * Returns the complement for a base code. + *+ * The case of the output will match the case of the input. + *
+ *+ * When an invalid base is provided this method will return the default encoding for the {@link #INVALID} nucleotide. + *
+ * @param b the input base + * @return the complement of the input. + */ + public static byte complement(final byte b) { + return complement(b, Character.isUpperCase(b)); + } + + /** + * Returns the instance that would include all possible transition mutations from this one. + * @return never {@code null}. + */ + public Nucleotide transition() { + if (transition == null) { + final int transitionMask = ((mask & A.mask) != 0 ? G.mask : 0) + | ((mask & G.mask) != 0 ? A.mask : 0) + | ((mask & C.mask) != 0 ? T.mask : 0) + | ((mask & T.mask) != 0 ? C.mask : 0); + transition = maskToValue[transitionMask]; + } + return transition; + } + + /** + * Returns the instance that would include all possible tranversion mutations from nucleotides included + * in this one. + * @return never {@code null}. + */ + public Nucleotide transversion() { + if (transversion == null) { + final int transversionMask = ((mask & PURINE.mask) != 0 ? PYRIMIDINE.mask : 0) + | ((mask & PYRIMIDINE.mask) != 0 ? PURINE.mask : 0); + transversion = maskToValue[transversionMask]; + } + return transversion; + } + + /** + * Transvertion mutation toward a strong or a weak base. + *+ * This method provides a non-ambiguous alternative to {@link #transversion()} for + * concrete nucleotides. + *
+ * + * @param strong whether the result should be a strong ({@code S: G, C}) or weak ({@code W: A, T}) nucleotide(s). + * @return nucleotides that may emerged from such a transversion. + */ + public Nucleotide transversion(final boolean strong) { + return transversion().intersect(strong ? STRONG : WEAK); + } + + /** + * Helper class to count the number of occurrences of each nucleotide code in * a sequence. */ - public static class Counter { + public static final class Counter { + private final long[] counts; /** * Creates a new counter with all counts set to 0. */ public Counter() { - counts = new long[Nucleotide.values().length]; + counts = new long[NUMBER_OF_CONSTANTS]; } /** @@ -115,7 +359,7 @@ public void add(final Nucleotide nucleotide) { * @throws IllegalArgumentException if {@code base} is {@code negative}. */ public void add(final byte base) { - add(valueOf(base)); + add(decode(base)); } /** @@ -135,7 +379,7 @@ public long get(final Nucleotide nucleotide) { * @throws IllegalArgumentException if {@code bases} are null or * it contains negative values. */ - public void addAll(final byte[] bases) { + public final void addAll(final byte ... bases) { Utils.nonNull(bases); for (final byte base : bases) { add(base); @@ -149,6 +393,10 @@ public void clear() { Arrays.fill(counts, 0); } + /** + * Return the total count of all nucleotide constants. + * @return 0 or greater. + */ public long sum() { return LongStream.of(counts).sum(); } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/reference/FastaReferenceWriter.java b/src/main/java/org/broadinstitute/hellbender/utils/reference/FastaReferenceWriter.java index 71864625631..b81be29cf8f 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/reference/FastaReferenceWriter.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/reference/FastaReferenceWriter.java @@ -315,7 +315,7 @@ private static void checkSequenceBases(final byte[] bases, final int offset, fin final int to = offset + length; for (int i = offset; i < to; i++) { final byte b = bases[i]; - if (Nucleotide.valueOf(b) == Nucleotide.INVALID) { + if (!Nucleotide.decode(b).isValid()) { throw new IllegalArgumentException( "the input sequence contains invalid base calls like: " + StringUtils.escape(""+ (char) b)); } @@ -541,7 +541,7 @@ private void writeDictEntry() { * @param bases array containing the bases to be added. * @return this instance. * @throws IllegalArgumentException if {@bases} is {@code null} or - * the input array contains invalid bases (as assessed by: {@link Nucleotide#valueOf(byte)}). + * the input array contains invalid bases (as assessed by: {@link Nucleotide#decode(byte)}). * @throws IllegalStateException if no sequence was started or the writer is already closed. * @throws IOException if such exception is throw when writing in any of the outputs. */ @@ -560,7 +560,7 @@ public FastaReferenceWriter appendBases(final byte[] bases) * @return this instance. * @throws IllegalArgumentException if {@bases} is {@code null} or * {@code offset} and {@code length} do not entail a valid range in {@code bases} or - * that range in {@base} contain invalid bases (as assessed by: {@link Nucleotide#valueOf(byte)}). + * that range in {@base} contain invalid bases (as assessed by: {@link Nucleotide#decode(byte)}). * @throws IllegalStateException if no sequence was started or the writer is already closed. * @throws IOException if such exception is throw when writing in any of the outputs. */ diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModelEngineUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModelEngineUnitTest.java index 3faa174b8a0..95a0882a28c 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModelEngineUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/readorientation/LearnReadOrientationModelEngineUnitTest.java @@ -395,7 +395,7 @@ private HistogramcreateRefHistograms(final String refContext, final in */ private List > createDepthOneAltHistograms(final String refContext, final int depth, final int numExamples) { final List > altComputationalHistograms = new ArrayList<>(F1R2FilterConstants.numAltHistogramsPerContext); - for (final Nucleotide altAllele : Nucleotide.REGULAR_BASES){ + for (final Nucleotide altAllele : Nucleotide.STANDARD_DNA_BASES){ if (altAllele == F1R2FilterUtils.getMiddleBase(refContext)){ continue; } diff --git a/src/test/java/org/broadinstitute/hellbender/utils/NucleotideUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/NucleotideUnitTest.java index a3461a72202..cb7c8a8b06f 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/NucleotideUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/NucleotideUnitTest.java @@ -19,65 +19,276 @@ public class NucleotideUnitTest { private static final int MAX_RANDOM_SEQ_LENGTH = 100; private static final int NUMBER_OF_RANDOM_SEQUENCES = 10; - @Test - public void testToBase() { - Assert.assertEquals(Nucleotide.A.toBase(), (byte)'A'); - Assert.assertEquals(Nucleotide.C.toBase(), (byte)'C'); - Assert.assertEquals(Nucleotide.G.toBase(), (byte)'G'); - Assert.assertEquals(Nucleotide.N.toBase(), (byte)'N'); - Assert.assertEquals(Nucleotide.T.toBase(), (byte)'T'); - Assert.assertEquals(Nucleotide.X.toBase(), (byte)'X'); + @Test(dataProvider = "values") + public void testEncodeAsByte(final Nucleotide nuc) { + // Will always use the first letter of the constant as the one byte encoding. + final char firstLetter = nuc.name().charAt(0); + final byte expectedLowerEncoding = (byte) Character.toLowerCase(firstLetter); + final byte expectedUpperEncoding = (byte) Character.toUpperCase(firstLetter); + Assert.assertEquals(nuc.encodeAsByte(), expectedUpperEncoding); // by default is upper case. + Assert.assertEquals(nuc.encodeAsByte(true), expectedUpperEncoding); + Assert.assertEquals(nuc.encodeAsByte(false), expectedLowerEncoding); } - @Test - public void testIsConcrete() { - for (final Nucleotide nuc : Nucleotide.values()) { - switch (nuc) { - case A: - case C: - case T: - case G: - Assert.assertTrue(nuc.isConcrete()); - break; - default: - Assert.assertFalse(nuc.isConcrete()); - } + @Test(dataProvider = "values") + public void testIsConcrete(final Nucleotide nuc) { + switch (nuc) { + case A: + case C: + case T: + case G: + Assert.assertTrue(nuc.isStandard()); + break; + default: + Assert.assertFalse(nuc.isStandard()); } } - @Test(expectedExceptions = UnsupportedOperationException.class) - public void testToBaseOnInvalid() { - Nucleotide.INVALID.toBase(); + @Test(dataProvider = "values") + public void testIsAmbiguous(final Nucleotide nuc) { + switch (nuc) { + case X: + case A: + case C: + case T: + case G: + Assert.assertFalse(nuc.isAmbiguous()); + break; + default: + Assert.assertTrue(nuc.isAmbiguous()); + } + } + + @Test(dataProvider = "values") + public void testIsValid(final Nucleotide nuc) { + switch (nuc) { + case X: + Assert.assertFalse(nuc.isValid()); + break; + default: + Assert.assertTrue(nuc.isValid()); + } } @Test - public void testValueOfBase() { + public void testDecode() { for (byte i = 0; i >= 0; i++) { final Nucleotide expected; switch (i) { case 'a': - case 'A': expected = Nucleotide.A; break; + case 'A': + expected = Nucleotide.A; + break; case 'c': - case 'C': expected = Nucleotide.C; break; + case 'C': + expected = Nucleotide.C; + break; case 'g': - case 'G': expected = Nucleotide.G; break; + case 'G': + expected = Nucleotide.G; + break; case 't': case 'T': case 'u': - case 'U': expected = Nucleotide.T; break; + case 'U': + expected = Nucleotide.T; + break; case 'n': - case 'N': expected = Nucleotide.N; break; + case 'N': + expected = Nucleotide.N; + break; case 'x': - case 'X': expected = Nucleotide.X; break; - default : expected = Nucleotide.INVALID; + case 'X': + expected = Nucleotide.X; + break; + case 'r': + case 'R': + expected = Nucleotide.R; + break; + case 'b': + case 'B': + expected = Nucleotide.B; + break; + case 'v': + case 'V': + expected = Nucleotide.V; + break; + case 'y': + case 'Y': + expected = Nucleotide.Y; + break; + case 's': + case 'S': + expected = Nucleotide.S; + break; + case 'w': + case 'W': + expected = Nucleotide.W; + break; + case 'k': + case 'K': + expected = Nucleotide.K; + break; + case 'm': + case 'M': + expected = Nucleotide.M; + break; + case 'd': + case 'D': + expected = Nucleotide.D; + break; + case 'h': + case 'H': + expected = Nucleotide.H; + break; + default: + expected = Nucleotide.X; + } + Assert.assertSame(Nucleotide.decode(i), expected, "Failed with base " + i + " returning nucleotide " + Nucleotide.decode(i)); + Assert.assertSame(Nucleotide.decode((char)i), expected, "Failed with base " + i + " returning nucleotide " + Nucleotide.decode((char)i)); + } + } + + @Test(dataProvider = "values") + public void testIncludes(final Nucleotide nuc) { + if (nuc.isStandard()) { + for (final Nucleotide other : Nucleotide.values()) { + if (other.isStandard()) { + Assert.assertEquals(nuc.includes(other), nuc == other); + Assert.assertEquals(nuc.includes(other.encodeAsByte()), nuc == other); + Assert.assertEquals(nuc.includes(other.encodeAsByte(false)), nuc == other); + } else { + Assert.assertFalse(nuc.includes(other)); + Assert.assertFalse(nuc.includes(other.encodeAsByte())); + Assert.assertFalse(nuc.includes(other.encodeAsByte(false))); + } + } + } else if (nuc.isAmbiguous()) { + for (final Nucleotide other : Nucleotide.values()) { + final boolean thisA = nuc.includes(Nucleotide.A); + final boolean thisC = nuc.includes(Nucleotide.C); + final boolean thisG = nuc.includes(Nucleotide.G); + final boolean thisT = nuc.includes(Nucleotide.T); + final boolean otherA = other.includes(Nucleotide.A); + final boolean otherC = other.includes(Nucleotide.C); + final boolean otherG = other.includes(Nucleotide.G); + final boolean otherT = other.includes(Nucleotide.T); + final boolean includes = other.isValid() && (thisA == otherA || thisA) + && (thisC == otherC || thisC) + && (thisG == otherG || thisG) + && (thisT == otherT || thisT); + Assert.assertEquals(nuc.includes(other), includes, "" + nuc + " " + other); + Assert.assertEquals(nuc.includes(other.encodeAsByte()), includes); + Assert.assertEquals(nuc.includes(other.encodeAsByte(false)), includes); + } + } else { // invalid + for (final Nucleotide other : Nucleotide.values()) { + Assert.assertFalse(nuc.includes(other)); + Assert.assertFalse(nuc.includes(other.encodeAsByte())); + Assert.assertFalse(nuc.includes(other.encodeAsByte(false))); } - Assert.assertSame(Nucleotide.valueOf(i), expected, "Failed with base " + i + " returning nucleotide " + Nucleotide.valueOf(i)); + } + + } + + @Test(dataProvider = "values") + public void testIntersects(final Nucleotide nuc) { + final boolean thisA = nuc.includes(Nucleotide.A); + final boolean thisC = nuc.includes(Nucleotide.C); + final boolean thisG = nuc.includes(Nucleotide.G); + final boolean thisT = nuc.includes(Nucleotide.T); + for (final Nucleotide other : Nucleotide.values()) { + final boolean otherA = other.includes(Nucleotide.A); + final boolean otherC = other.includes(Nucleotide.C); + final boolean otherG = other.includes(Nucleotide.G); + final boolean otherT = other.includes(Nucleotide.T); + final Nucleotide intersect = nuc.intersect(other); + Assert.assertNotNull(intersect); + Assert.assertEquals(intersect.includes(Nucleotide.A), thisA && otherA); + Assert.assertEquals(intersect.includes(Nucleotide.C), thisC && otherC); + Assert.assertEquals(intersect.includes(Nucleotide.G), thisG && otherG); + Assert.assertEquals(intersect.includes(Nucleotide.T), thisT && otherT); + } + } + + @Test(dataProvider = "values") + public void testComplement(final Nucleotide nuc) { + final boolean thisA = nuc.includes(Nucleotide.A); + final boolean thisC = nuc.includes(Nucleotide.C); + final boolean thisG = nuc.includes(Nucleotide.G); + final boolean thisT = nuc.includes(Nucleotide.T); + final Nucleotide complement = nuc.complement(); + final boolean compA = complement.includes(Nucleotide.A); + final boolean compC = complement.includes(Nucleotide.C); + final boolean compG = complement.includes(Nucleotide.G); + final boolean compT = complement.includes(Nucleotide.T); + final String errorMessage = "Failure with " + nuc + " result in complement " + complement; + Assert.assertEquals(compA, thisT, errorMessage); + Assert.assertEquals(compT, thisA, errorMessage); + Assert.assertEquals(compC, thisG, errorMessage); + Assert.assertEquals(compG, thisC, errorMessage); + } + + @Test(dataProvider = "values") + public void testTransition(final Nucleotide nuc) { + final boolean thisA = nuc.includes(Nucleotide.A); + final boolean thisC = nuc.includes(Nucleotide.C); + final boolean thisG = nuc.includes(Nucleotide.G); + final boolean thisT = nuc.includes(Nucleotide.T); + final Nucleotide trans = nuc.transition(); + final boolean tranA = trans.includes(Nucleotide.A); + final boolean tranC = trans.includes(Nucleotide.C); + final boolean tranG = trans.includes(Nucleotide.G); + final boolean tranT = trans.includes(Nucleotide.T); + final String errorMessage = "Failure with " + nuc + " result in transition " + trans; + Assert.assertEquals(tranA, thisG, errorMessage); + Assert.assertEquals(tranG, thisA, errorMessage); + Assert.assertEquals(tranC, thisT, errorMessage); + Assert.assertEquals(tranT, thisC, errorMessage); + } + + @Test(dataProvider = "values") + public void testTransversion(final Nucleotide nuc) { + final boolean thisA = nuc.includes(Nucleotide.A); + final boolean thisC = nuc.includes(Nucleotide.C); + final boolean thisG = nuc.includes(Nucleotide.G); + final boolean thisT = nuc.includes(Nucleotide.T); + final Nucleotide trans = nuc.transversion(); + final boolean tranA = trans.includes(Nucleotide.A); + final boolean tranC = trans.includes(Nucleotide.C); + final boolean tranG = trans.includes(Nucleotide.G); + final boolean tranT = trans.includes(Nucleotide.T); + final String errorMessage = "Failure with " + nuc + " result in transversion " + trans; + Assert.assertEquals(tranA, thisC || thisT, errorMessage); + Assert.assertEquals(tranG, thisC || thisT, errorMessage); + Assert.assertEquals(tranC, thisA || thisG, errorMessage); + Assert.assertEquals(tranT, thisA || thisG, errorMessage); + final Nucleotide transStrong = nuc.transversion(true); + final Nucleotide transWeak = nuc.transversion(false); + Assert.assertTrue(trans.includes(transStrong) || trans == Nucleotide.X || transStrong == Nucleotide.X); + Assert.assertTrue(trans.includes(transWeak) || trans == Nucleotide.X || transStrong == Nucleotide.X); + Assert.assertSame(transStrong.intersect(transWeak), Nucleotide.X); + Assert.assertEquals(transStrong.includes(Nucleotide.C), tranC); + Assert.assertEquals(transStrong.includes(Nucleotide.G), tranG); + Assert.assertEquals(transWeak.includes(Nucleotide.A), tranA); + Assert.assertEquals(transWeak.includes(Nucleotide.T), tranT); + } + + @Test(dataProvider = "values") + public void testSame(final Nucleotide nuc) { + for (final Nucleotide other : Nucleotide.values()) { + final boolean reallyTheSame = nuc != Nucleotide.INVALID && nuc == other; + Assert.assertEquals(nuc.same(other), reallyTheSame); + Assert.assertEquals(Nucleotide.same(nuc.encodeAsByte(), other.encodeAsByte()), reallyTheSame); + Assert.assertEquals(Nucleotide.same(nuc.encodeAsByte(false), other.encodeAsByte()), reallyTheSame); + Assert.assertEquals(Nucleotide.same(nuc.encodeAsByte(), other.encodeAsByte(false)), reallyTheSame); + Assert.assertEquals(Nucleotide.same(nuc.encodeAsByte(false), other.encodeAsByte(false)), reallyTheSame); } } @Test(expectedExceptions = IllegalArgumentException.class) public void testValueOfNegativeBase() { - Nucleotide.valueOf((byte) -10); + Nucleotide.decode((byte) -10); } @Test @@ -88,13 +299,13 @@ public void testNucleotideCounterInit() { } } - @Test(dependsOnMethods = "testValueOfBase", dataProvider = "testSequences") + @Test(dependsOnMethods = "testDecode", dataProvider = "testSequences") public void testAddingOneByOne(final byte[] bases) { final Nucleotide.Counter subject = new Nucleotide.Counter(); final Map shadow = new HashMap<>(Nucleotide.values().length); for (final byte base : bases) { subject.add(base); - final Nucleotide nuc = Nucleotide.valueOf(base); + final Nucleotide nuc = Nucleotide.decode(base); shadow.put(nuc, shadow.getOrDefault(nuc, 0) + 1); for (final Nucleotide n : Nucleotide.values()) { Assert.assertEquals(subject.get(n), (long) shadow.getOrDefault(n, 0)); @@ -103,12 +314,12 @@ public void testAddingOneByOne(final byte[] bases) { Assert.assertEquals(subject.sum(), shadow.values().stream().mapToLong(l -> l).sum()); } - @Test(dependsOnMethods = "testValueOfBase", dataProvider = "testSequences") + @Test(dependsOnMethods = "testDecode", dataProvider = "testSequences") public void testAddingAllAtOnce(final byte[] bases) { final Nucleotide.Counter subject = new Nucleotide.Counter(); final Map shadow = new HashMap<>(Nucleotide.values().length); for (final byte base : bases) { - final Nucleotide nuc = Nucleotide.valueOf(base); + final Nucleotide nuc = Nucleotide.decode(base); shadow.put(nuc, shadow.getOrDefault(nuc, 0) + 1); } subject.addAll(bases); @@ -121,24 +332,21 @@ public void testAddingAllAtOnce(final byte[] bases) { @Test(expectedExceptions = IllegalArgumentException.class) public void testAddingAllAtOnceOnANullArray() { final Nucleotide.Counter subject = new Nucleotide.Counter(); - subject.addAll(null); + subject.addAll((byte[]) null); } - @Test(expectedExceptions = IllegalArgumentException.class) public void testAddingAllAtOnceWithNegativeBases() { final Nucleotide.Counter subject = new Nucleotide.Counter(); - subject.addAll(new byte[] { 'a', 'A', -10, 'C' } ); + subject.addAll(new byte[]{'a', 'A', -10, 'C'}); } - - - @Test(dependsOnMethods = "testValueOfBase", dataProvider = "testSequences") + @Test(dependsOnMethods = "testDecode", dataProvider = "testSequences") public void testClear(final byte[] bases) { final Nucleotide.Counter subject = new Nucleotide.Counter(); final Map shadow = new HashMap<>(Nucleotide.values().length); for (final byte base : bases) { - final Nucleotide nuc = Nucleotide.valueOf(base); + final Nucleotide nuc = Nucleotide.decode(base); shadow.put(nuc, shadow.getOrDefault(nuc, 0) + 1); } subject.addAll(bases); @@ -152,22 +360,87 @@ public void testClear(final byte[] bases) { Assert.assertEquals(subject.sum(), 0); } + @Test + public void testUracilSameAsThymine() { + Assert.assertSame(Nucleotide.U, Nucleotide.T); + } + + @Test + public void testLongFormNames() { + Assert.assertSame(Nucleotide.ADENINE, Nucleotide.A); + Assert.assertSame(Nucleotide.THYMINE, Nucleotide.T); + Assert.assertSame(Nucleotide.GUANINE, Nucleotide.G); + Assert.assertSame(Nucleotide.CYTOSINE, Nucleotide.C); + Assert.assertSame(Nucleotide.URACIL, Nucleotide.U); + Assert.assertSame(Nucleotide.ANY, Nucleotide.N); + Assert.assertSame(Nucleotide.UNKNOWN, Nucleotide.N); + Assert.assertSame(Nucleotide.PURINE, Nucleotide.R); + Assert.assertSame(Nucleotide.PYRIMIDINE, Nucleotide.Y); + Assert.assertSame(Nucleotide.INVALID, Nucleotide.X); + Assert.assertSame(Nucleotide.STRONG, Nucleotide.S); + Assert.assertSame(Nucleotide.WEAK, Nucleotide.W); + Assert.assertSame(Nucleotide.KETO, Nucleotide.K); + Assert.assertSame(Nucleotide.AMINO, Nucleotide.M); + } + + @Test + public void testLongFormNamesForTypos() { + // We avoid a direct comparison of the constant value (rather than indirectly using its name) + // because that would typos and here the names matter and are unlikely to change even in the long term. + Assert.assertSame(constantNameToInstance("ADENINE"), Nucleotide.A); + Assert.assertSame(constantNameToInstance("THYMINE"), Nucleotide.T); + Assert.assertSame(constantNameToInstance("GUANINE"), Nucleotide.G); + Assert.assertSame(constantNameToInstance("CYTOSINE"), Nucleotide.C); + Assert.assertSame(constantNameToInstance("URACIL"), Nucleotide.U); + Assert.assertSame(constantNameToInstance("ANY"), Nucleotide.N); + Assert.assertSame(constantNameToInstance("UNKNOWN"), Nucleotide.N); + Assert.assertSame(constantNameToInstance("PURINE"), Nucleotide.R); + Assert.assertSame(constantNameToInstance("PYRIMIDINE"), Nucleotide.Y); + Assert.assertSame(constantNameToInstance("INVALID"), Nucleotide.X); + Assert.assertSame(constantNameToInstance("STRONG"), Nucleotide.S); + Assert.assertSame(constantNameToInstance("WEAK"), Nucleotide.W); + Assert.assertSame(constantNameToInstance("KETO"), Nucleotide.K); + Assert.assertSame(constantNameToInstance("AMINO"), Nucleotide.M); + } + + private Nucleotide constantNameToInstance(final String name) { + try { + return (Nucleotide) Nucleotide.class.getField(name).get(null); + } catch (final IllegalAccessException e) { + Assert.fail("Long name constant " + name + " is not accessible"); + } catch (final NoSuchFieldException e) { + Assert.fail("Long name constant " + name + " does not exists"); + } catch (final ClassCastException e) { + Assert.fail("Long name constant " + name + " so not typed as " + Nucleotide.class.getName()); + } + throw new IllegalStateException("unreachable code"); + } + @DataProvider(name = "testSequences") public Object[][] testSequences() { final List