Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: raise exception if CollectDuplexSeqMetrics run on consensus BAM #1003

Merged
merged 13 commits into from
Jul 31, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,20 @@ class CollectDuplexSeqMetrics
override def execute(): Unit = {
// Build the iterator we'll use based on whether or not we're restricting to a set of intervals
val in = SamSource(input)
val _filteredIterator = in.iterator.filter(r => r.paired && r.mapped && r.mateMapped && r.firstOfPair && !r.secondary && !r.supplementary)
val _filteredIterator = in
.iterator
.filter(r => r.paired && r.mapped && r.mateMapped && r.firstOfPair && !r.secondary && !r.supplementary)
.bufferBetter
// Ensure the records are not consensus records
_filteredIterator.headOption.foreach {
rec =>
val exceptionString = s"Input BAM file to CollectDuplexSeqMetrics ($input) appears to contain consensus sequences. " +
"CollectDuplexSeqMetrics cannot run on consensus BAMs, and instead requires the UMI-grouped BAM generated " +
"prior to consensus calling. The UMI-grouped BAM is the output of running GroupReadsByUmi." +
znorgaard marked this conversation as resolved.
Show resolved Hide resolved
s"\nFirst record in $input has consensus SAM tags present:\n$rec"

if (Umis.isFgbioStyleConsensus(rec)) throw new IllegalArgumentException(exceptionString)
}
val iterator = intervals match {
case None => _filteredIterator
case Some(path) =>
Expand Down
11 changes: 11 additions & 0 deletions src/main/scala/com/fulcrumgenomics/umi/Umis.scala
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
package com.fulcrumgenomics.umi

import com.fulcrumgenomics.bam.api.SamRecord
import com.fulcrumgenomics.umi.ConsensusTags
import com.fulcrumgenomics.util.Sequences

object Umis {
Expand Down Expand Up @@ -127,4 +128,14 @@ object Umis {
@inline private def isValidUmiCharacter(ch: Char): Boolean = {
ch == 'A' || ch == 'C' || ch == 'G' || ch == 'T' || ch == 'N' || ch == '-'
}

/** Tests if a record is a consensus or not
znorgaard marked this conversation as resolved.
Show resolved Hide resolved
*
* @param rec the record to test
znorgaard marked this conversation as resolved.
Show resolved Hide resolved
* @return boolean indicating if the record is a consensus or not
*/
def isFgbioStyleConsensus(rec: SamRecord): Boolean = {
rec.contains(ConsensusTags.PerRead.RawReadCount) ||
(rec.contains(ConsensusTags.PerRead.AbRawReadCount) && rec.contains(ConsensusTags.PerRead.BaRawReadCount))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,22 @@ class CollectDuplexSeqMetricsTest extends UnitSpec {
duplexFamilies.find(f => f.ab_size == 6 && f.ba_size == 0).get.count shouldBe 1
}

it should "raise an exception if duplex consensus records are provided" in {
val builder = new SamBuilder(readLength=100, sort=Some(SamOrder.TemplateCoordinate))
builder.addPair(
contig=1,
start1=1000,
start2=1100,
attrs=Map(
RX -> "AAA-GGG",
MI -> "1/A",
ConsensusTags.PerRead.AbRawReadCount -> 10,
ConsensusTags.PerRead.BaRawReadCount -> 10
)
)
an[IllegalArgumentException] shouldBe thrownBy { exec(builder) }
}

"CollectDuplexSeqMetrics.updateUmiMetrics" should "not count duplex umis" in collector(duplex=false).foreach { c =>
val builder = new SamBuilder(readLength=10)
builder.addPair(start1=100, start2=200, attrs=Map(RX -> "AAA-CCC", MI -> "1/A"))
Expand Down
18 changes: 17 additions & 1 deletion src/test/scala/com/fulcrumgenomics/umi/UmisTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

package com.fulcrumgenomics.umi

import com.fulcrumgenomics.bam.api.SamRecord
import com.fulcrumgenomics.bam.api.{SamOrder, SamRecord}
import com.fulcrumgenomics.testing.{SamBuilder, UnitSpec}
import org.scalatest.OptionValues

Expand Down Expand Up @@ -111,4 +111,20 @@ class UmisTest extends UnitSpec with OptionValues {
an[Exception] should be thrownBy copyUmiFromReadName(rec=rec("NAME:ACGT-CCKC"))
an[Exception] should be thrownBy copyUmiFromReadName(rec=rec("NAME:CCKC-ACGT"))
}

"Umis.isConsensusRead" should "return false for reads without consensus tags" in {
val builder = new SamBuilder(sort=Some(SamOrder.Coordinate), readLength=10, baseQuality=20)
builder.addFrag(start=100).exists(Umis.isFgbioStyleConsensus) shouldBe false
builder.addPair(start1=100, start2=100, unmapped2=true).exists(Umis.isFgbioStyleConsensus) shouldBe false
}

it should "return true for reads with consensus tags" in {
val builder = new SamBuilder(sort=Some(SamOrder.Coordinate), readLength=10, baseQuality=20)
builder.addFrag(start=10, attrs=Map(ConsensusTags.PerRead.RawReadCount -> 10))
.exists(Umis.isFgbioStyleConsensus) shouldBe true
builder.addFrag(
start=10,
attrs=Map(ConsensusTags.PerRead.AbRawReadCount -> 10, ConsensusTags.PerRead.BaRawReadCount -> 10)
).exists(Umis.isFgbioStyleConsensus) shouldBe true
}
}
Loading