Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: raise exception if CollectDuplexSeqMetrics run on consensus BAM #1003

Merged
merged 13 commits into from
Jul 31, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,20 @@ class CollectDuplexSeqMetrics
override def execute(): Unit = {
// Build the iterator we'll use based on whether or not we're restricting to a set of intervals
val in = SamSource(input)
val _filteredIterator = in.iterator.filter(r => r.paired && r.mapped && r.mateMapped && r.firstOfPair && !r.secondary && !r.supplementary)
val _filteredIterator = in
.iterator
.filter(r => r.paired && r.mapped && r.mateMapped && r.firstOfPair && !r.secondary && !r.supplementary)
.bufferBetter
// Ensure the records are not consensus records
_filteredIterator.headOption.foreach {
rec =>
val exceptionString = s"Input BAM file to CollectDuplexSeqMetrics ($input) appears to contain consensus sequences. " +
"CollectDuplexSeqMetrics cannot run on consensus BAMs, and instead requires the UMI-grouped BAM generated " +
"by GroupReadsByUmi which is run prior to consensus calling." +
"\nFirst record in $input has consensus SAM tags present:\n$rec"

if (Umis.isFgbioStyleConsensus(rec)) throw new IllegalArgumentException(exceptionString)
}
val iterator = intervals match {
case None => _filteredIterator
case Some(path) =>
Expand Down
12 changes: 12 additions & 0 deletions src/main/scala/com/fulcrumgenomics/umi/Umis.scala
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
package com.fulcrumgenomics.umi

import com.fulcrumgenomics.bam.api.SamRecord
import com.fulcrumgenomics.umi.ConsensusTags
import com.fulcrumgenomics.util.Sequences

object Umis {
Expand Down Expand Up @@ -127,4 +128,15 @@ object Umis {
@inline private def isValidUmiCharacter(ch: Char): Boolean = {
ch == 'A' || ch == 'C' || ch == 'G' || ch == 'T' || ch == 'N' || ch == '-'
}

/** Returns True if the record appears to be a consensus read,
* typically produced by fgbio's CallMolecularConsensusReads or CallDuplexConsensusReads.
*
* @param rec the record to check
* @return boolean indicating if the record is a consensus or not
*/
def isFgbioStyleConsensus(rec: SamRecord): Boolean = {
rec.contains(ConsensusTags.PerRead.RawReadCount) ||
(rec.contains(ConsensusTags.PerRead.AbRawReadCount) && rec.contains(ConsensusTags.PerRead.BaRawReadCount))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,22 @@ class CollectDuplexSeqMetricsTest extends UnitSpec {
duplexFamilies.find(f => f.ab_size == 6 && f.ba_size == 0).get.count shouldBe 1
}

it should "raise an exception if duplex consensus records are provided" in {
val builder = new SamBuilder(readLength=100, sort=Some(SamOrder.TemplateCoordinate))
builder.addPair(
contig=1,
start1=1000,
start2=1100,
attrs=Map(
RX -> "AAA-GGG",
MI -> "1/A",
ConsensusTags.PerRead.AbRawReadCount -> 10,
ConsensusTags.PerRead.BaRawReadCount -> 10
)
)
an[IllegalArgumentException] shouldBe thrownBy { exec(builder) }
}

"CollectDuplexSeqMetrics.updateUmiMetrics" should "not count duplex umis" in collector(duplex=false).foreach { c =>
val builder = new SamBuilder(readLength=10)
builder.addPair(start1=100, start2=200, attrs=Map(RX -> "AAA-CCC", MI -> "1/A"))
Expand Down
18 changes: 17 additions & 1 deletion src/test/scala/com/fulcrumgenomics/umi/UmisTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

package com.fulcrumgenomics.umi

import com.fulcrumgenomics.bam.api.SamRecord
import com.fulcrumgenomics.bam.api.{SamOrder, SamRecord}
import com.fulcrumgenomics.testing.{SamBuilder, UnitSpec}
import org.scalatest.OptionValues

Expand Down Expand Up @@ -111,4 +111,20 @@ class UmisTest extends UnitSpec with OptionValues {
an[Exception] should be thrownBy copyUmiFromReadName(rec=rec("NAME:ACGT-CCKC"))
an[Exception] should be thrownBy copyUmiFromReadName(rec=rec("NAME:CCKC-ACGT"))
}

"Umis.isConsensusRead" should "return false for reads without consensus tags" in {
val builder = new SamBuilder(sort=Some(SamOrder.Coordinate), readLength=10, baseQuality=20)
builder.addFrag(start=100).exists(Umis.isFgbioStyleConsensus) shouldBe false
builder.addPair(start1=100, start2=100, unmapped2=true).exists(Umis.isFgbioStyleConsensus) shouldBe false
}

it should "return true for reads with consensus tags" in {
val builder = new SamBuilder(sort=Some(SamOrder.Coordinate), readLength=10, baseQuality=20)
builder.addFrag(start=10, attrs=Map(ConsensusTags.PerRead.RawReadCount -> 10))
.exists(Umis.isFgbioStyleConsensus) shouldBe true
builder.addFrag(
start=10,
attrs=Map(ConsensusTags.PerRead.AbRawReadCount -> 10, ConsensusTags.PerRead.BaRawReadCount -> 10)
).exists(Umis.isFgbioStyleConsensus) shouldBe true
}
}
Loading