Skip to content

Commit

Permalink
add more tests, fix some tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jdidion committed Apr 4, 2024
1 parent 81e39d1 commit f57f1e8
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 17 deletions.
26 changes: 13 additions & 13 deletions src/main/scala/com/fulcrumgenomics/vcf/DownsampleVcf.scala
Original file line number Diff line number Diff line change
Expand Up @@ -104,17 +104,17 @@ object DownsampleVcf extends LazyLogging {
gt.copy(attrs=Map("PL" -> pls, "AD" -> newAds, "DP" -> newAds.sum), calls=calls)
}

/**Converts a sequence of log-likelihoods to phred-scale by 1) multiplying each by -10, 2)
* subtracting from each the min value so the smallest value is 0, and 3) rounding to the
* nearest integer.
*/
def logToPhredLikelihoods(logLikelihoods: IndexedSeq[Double]): IndexedSeq[Int] = {
val rawPL = logLikelihoods.map(gl => gl * -10)
val minPL = rawPL.min
rawPL.map(pl => (pl - minPL).round.toInt)
}

object Likelihoods {
/**Converts a sequence of log-likelihoods to phred-scale by 1) multiplying each by -10, 2)
* subtracting from each the min value so the smallest value is 0, and 3) rounding to the
* nearest integer.
*/
def logToPhredLikelihoods(logLikelihoods: IndexedSeq[Double]): IndexedSeq[Int] = {
val rawPL = logLikelihoods.map(gl => gl * -10)
val minPL = rawPL.min
rawPL.map(pl => (pl - minPL).round.toInt)
}

/** Computes the likelihoods for each possible biallelic genotype.
* @param alleleDepthA the reference allele depth
* @param alleleDepthB the alternate allele depth
Expand Down Expand Up @@ -143,7 +143,7 @@ object DownsampleVcf extends LazyLogging {
def generalized(alleleDepths: IndexedSeq[Int], epsilon: Double = 0.01): Likelihoods = {
val numAlleles = alleleDepths.length
// probabilities associated with each possible genotype for a pair of alleles
val probs: Array[Double] = Array(
val logProbs: Array[Double] = Array(
math.log10(epsilon),
math.log10((1 - epsilon) / 2),
math.log10(1 - epsilon)
Expand All @@ -154,7 +154,7 @@ object DownsampleVcf extends LazyLogging {
(0 until numAlleles).flatMap(b =>
(0 to b).map(a =>
(0 until numAlleles).map(allele =>
probs(Array(a, b).count(_ == allele)) * alleleDepths(allele)
logProbs(Array(a, b).count(_ == allele)) * alleleDepths(allele)
).sum
)
)
Expand All @@ -177,7 +177,7 @@ object DownsampleVcf extends LazyLogging {
* @return a list of phred-scaled likelihooodS for AA, AB, BB.
*/
def pls: IndexedSeq[Int] = {
logToPhredLikelihoods(genotypeLikelihoods)
Likelihoods.logToPhredLikelihoods(genotypeLikelihoods)
}

def mostLikelyGenotype: Option[(Int, Int)] = {
Expand Down
23 changes: 19 additions & 4 deletions src/test/scala/com/fulcrumgenomics/vcf/DownsampleVcfTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,22 @@ class DownsampleVcfTest extends UnitSpec {
val likelihood = Likelihoods(input, e)
val logOutput = output.map(p => math.log10(p))
likelihood.pls.length shouldBe logOutput.length
likelihood.pls should contain theSameElementsInOrderAs DownsampleVcf.logToPhredLikelihoods(logOutput)
likelihood.pls should contain theSameElementsInOrderAs DownsampleVcf.Likelihoods.logToPhredLikelihoods(logOutput)
}
}

it should "return the same results for biallelic and generalized algorithm" in {
val e = 0.01
val cases: IndexedSeq[(IndexedSeq[Int], IndexedSeq[Double])] = IndexedSeq(
(IndexedSeq(1, 0), IndexedSeq(1 - e, 0.5, e)),
(IndexedSeq(0, 1), IndexedSeq(e, 0.5, 1 - e)),
(IndexedSeq(1, 1), IndexedSeq((1 - e) * e, 0.25, (1 - e) * e)),
(IndexedSeq(2, 0), IndexedSeq(math.pow((1 - e), 2), 0.25, math.pow(e, 2))),
)
cases.foreach { case (input, output) =>
val biallelic = DownsampleVcf.Likelihoods.biallelic(input(0), input(1), e)
val generalized = DownsampleVcf.Likelihoods.generalized(input, e)
biallelic.pls should contain theSameElementsInOrderAs generalized.pls
}
}

Expand All @@ -239,17 +254,17 @@ class DownsampleVcfTest extends UnitSpec {

it should "return a likelihood of 0 for AA if the AD A >> AD B" in {
val likelihood = Likelihoods(IndexedSeq(15, 2))
likelihood.pls(0) == 0
assert(likelihood.pls(0) == 0)
}

it should "return a likelihood of 0 for AB if AD.A and AD.B are similar but not equal" in {
val likelihood = Likelihoods(IndexedSeq(15, 17))
likelihood.pls(1) == 0
assert(likelihood.pls(1) == 0)
}

it should "return a likelihood of 0 for BB if AD.B >> AD.A but neither are 0" in {
val likelihood = Likelihoods(IndexedSeq(3, 30))
likelihood.pls(2) == 0
assert(likelihood.pls(2) == 0)
}

it should "return correct values when there are very few reads" in {
Expand Down

0 comments on commit f57f1e8

Please sign in to comment.