diff --git a/src/main/scala/com/fulcrumgenomics/vcf/DownsampleVcf.scala b/src/main/scala/com/fulcrumgenomics/vcf/DownsampleVcf.scala index b02c0b24e..c47e6b5c6 100644 --- a/src/main/scala/com/fulcrumgenomics/vcf/DownsampleVcf.scala +++ b/src/main/scala/com/fulcrumgenomics/vcf/DownsampleVcf.scala @@ -104,17 +104,17 @@ object DownsampleVcf extends LazyLogging { gt.copy(attrs=Map("PL" -> pls, "AD" -> newAds, "DP" -> newAds.sum), calls=calls) } - /**Converts a sequence of log-likelihoods to phred-scale by 1) multiplying each by -10, 2) - * subtracting from each the min value so the smallest value is 0, and 3) rounding to the - * nearest integer. - */ - def logToPhredLikelihoods(logLikelihoods: IndexedSeq[Double]): IndexedSeq[Int] = { - val rawPL = logLikelihoods.map(gl => gl * -10) - val minPL = rawPL.min - rawPL.map(pl => (pl - minPL).round.toInt) - } - object Likelihoods { + /**Converts a sequence of log-likelihoods to phred-scale by 1) multiplying each by -10, 2) + * subtracting from each the min value so the smallest value is 0, and 3) rounding to the + * nearest integer. + */ + def logToPhredLikelihoods(logLikelihoods: IndexedSeq[Double]): IndexedSeq[Int] = { + val rawPL = logLikelihoods.map(gl => gl * -10) + val minPL = rawPL.min + rawPL.map(pl => (pl - minPL).round.toInt) + } + /** Computes the likelihoods for each possible biallelic genotype. * @param alleleDepthA the reference allele depth * @param alleleDepthB the alternate allele depth @@ -143,7 +143,7 @@ object DownsampleVcf extends LazyLogging { def generalized(alleleDepths: IndexedSeq[Int], epsilon: Double = 0.01): Likelihoods = { val numAlleles = alleleDepths.length // probabilities associated with each possible genotype for a pair of alleles - val probs: Array[Double] = Array( + val logProbs: Array[Double] = Array( math.log10(epsilon), math.log10((1 - epsilon) / 2), math.log10(1 - epsilon) @@ -154,7 +154,7 @@ object DownsampleVcf extends LazyLogging { (0 until numAlleles).flatMap(b => (0 to b).map(a => (0 until numAlleles).map(allele => - probs(Array(a, b).count(_ == allele)) * alleleDepths(allele) + logProbs(Array(a, b).count(_ == allele)) * alleleDepths(allele) ).sum ) ) @@ -177,7 +177,7 @@ object DownsampleVcf extends LazyLogging { * @return a list of phred-scaled likelihooodS for AA, AB, BB. */ def pls: IndexedSeq[Int] = { - logToPhredLikelihoods(genotypeLikelihoods) + Likelihoods.logToPhredLikelihoods(genotypeLikelihoods) } def mostLikelyGenotype: Option[(Int, Int)] = { diff --git a/src/test/scala/com/fulcrumgenomics/vcf/DownsampleVcfTest.scala b/src/test/scala/com/fulcrumgenomics/vcf/DownsampleVcfTest.scala index 1ea36acf1..283843dc0 100644 --- a/src/test/scala/com/fulcrumgenomics/vcf/DownsampleVcfTest.scala +++ b/src/test/scala/com/fulcrumgenomics/vcf/DownsampleVcfTest.scala @@ -215,7 +215,22 @@ class DownsampleVcfTest extends UnitSpec { val likelihood = Likelihoods(input, e) val logOutput = output.map(p => math.log10(p)) likelihood.pls.length shouldBe logOutput.length - likelihood.pls should contain theSameElementsInOrderAs DownsampleVcf.logToPhredLikelihoods(logOutput) + likelihood.pls should contain theSameElementsInOrderAs DownsampleVcf.Likelihoods.logToPhredLikelihoods(logOutput) + } + } + + it should "return the same results for biallelic and generalized algorithm" in { + val e = 0.01 + val cases: IndexedSeq[(IndexedSeq[Int], IndexedSeq[Double])] = IndexedSeq( + (IndexedSeq(1, 0), IndexedSeq(1 - e, 0.5, e)), + (IndexedSeq(0, 1), IndexedSeq(e, 0.5, 1 - e)), + (IndexedSeq(1, 1), IndexedSeq((1 - e) * e, 0.25, (1 - e) * e)), + (IndexedSeq(2, 0), IndexedSeq(math.pow((1 - e), 2), 0.25, math.pow(e, 2))), + ) + cases.foreach { case (input, output) => + val biallelic = DownsampleVcf.Likelihoods.biallelic(input(0), input(1), e) + val generalized = DownsampleVcf.Likelihoods.generalized(input, e) + biallelic.pls should contain theSameElementsInOrderAs generalized.pls } } @@ -239,17 +254,17 @@ class DownsampleVcfTest extends UnitSpec { it should "return a likelihood of 0 for AA if the AD A >> AD B" in { val likelihood = Likelihoods(IndexedSeq(15, 2)) - likelihood.pls(0) == 0 + assert(likelihood.pls(0) == 0) } it should "return a likelihood of 0 for AB if AD.A and AD.B are similar but not equal" in { val likelihood = Likelihoods(IndexedSeq(15, 17)) - likelihood.pls(1) == 0 + assert(likelihood.pls(1) == 0) } it should "return a likelihood of 0 for BB if AD.B >> AD.A but neither are 0" in { val likelihood = Likelihoods(IndexedSeq(3, 30)) - likelihood.pls(2) == 0 + assert(likelihood.pls(2) == 0) } it should "return correct values when there are very few reads" in {