Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: raise exception if CollectDuplexSeqMetrics run on consensus BAM #1003

Merged
merged 13 commits into from
Jul 31, 2024
5 changes: 5 additions & 0 deletions src/main/scala/com/fulcrumgenomics/bam/api/SamRecord.scala
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ package com.fulcrumgenomics.bam.api

import com.fulcrumgenomics.FgBioDef._
import com.fulcrumgenomics.alignment.Cigar
import com.fulcrumgenomics.umi.ConsensusTags.PerRead.AllPerReadTags
znorgaard marked this conversation as resolved.
Show resolved Hide resolved
import htsjdk.samtools
import htsjdk.samtools.SamPairUtil.PairOrientation
import htsjdk.samtools._
Expand Down Expand Up @@ -269,6 +270,10 @@ trait SamRecord {
SamPairUtil.getPairOrientation(this) == PairOrientation.FR
}

def isConsensus: Boolean = {
AllPerReadTags.exists(this.contains)
}

clintval marked this conversation as resolved.
Show resolved Hide resolved
/** Clone method that does a "reasonably deep" clone. The bases and quals are cloned as is the attributes map,
* though not the values in the attributes map. */
override def clone(): SamRecord = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ class CollectDuplexSeqMetrics

// Assign reads a random number between 0 and 1 inclusive based on their read name
group.foreach { rec =>
if (rec.isConsensus) throw new IllegalArgumentException("Found consensus record. Expected UMI-grouped BAM")
znorgaard marked this conversation as resolved.
Show resolved Hide resolved
val intHash = math.abs(this.hasher.hashUnencodedChars(rec.name))
val doubleHash = intHash / MaxIntAsDouble
rec.transientAttrs(HashKey) = doubleHash
Expand Down
14 changes: 14 additions & 0 deletions src/test/scala/com/fulcrumgenomics/bam/api/SamRecordTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ package com.fulcrumgenomics.bam.api
import com.fulcrumgenomics.alignment.Cigar
import com.fulcrumgenomics.testing.SamBuilder.{Minus, Plus}
import com.fulcrumgenomics.testing.{SamBuilder, UnitSpec}
import com.fulcrumgenomics.umi.ConsensusTags.PerRead.AllPerReadTags
import htsjdk.samtools.TextCigarCodec
import org.scalatest.OptionValues

Expand Down Expand Up @@ -289,4 +290,17 @@ class SamRecordTest extends UnitSpec with OptionValues {
rec1.matesOverlap shouldBe None // Mate's start is not enclosed by rec, and mate's end cannot be determined
rec2.matesOverlap.value shouldBe true // Mate's start is enclosed by rec, regardless of where mate end is
}

"SamRecord.isConsensus" should "return false for reads without consensus tags" in {
val builder = new SamBuilder(sort=Some(SamOrder.Coordinate), readLength=10, baseQuality=20)
builder.addFrag(start=100).exists(_.isConsensus) shouldBe false
builder.addPair(start1=100, start2=100, unmapped2=true).exists(_.isConsensus) shouldBe false
}

it should "return true for reads with consensus tags" in {
val builder = new SamBuilder(sort=Some(SamOrder.Coordinate), readLength=10, baseQuality=20)
AllPerReadTags.forall(
tag => builder.addFrag(start=10, attrs=Map(tag -> 123)).exists(_.isConsensus)
) shouldBe true
}
znorgaard marked this conversation as resolved.
Show resolved Hide resolved
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import com.fulcrumgenomics.bam.api.SamOrder
import com.fulcrumgenomics.commons.util.SimpleCounter
import com.fulcrumgenomics.testing.SamBuilder.{Minus, Plus}
import com.fulcrumgenomics.testing.{SamBuilder, UnitSpec}
import com.fulcrumgenomics.umi.ConsensusTags.PerRead.AllPerReadTags
import com.fulcrumgenomics.util.{Io, Metric, Rscript}
import htsjdk.samtools.util.{Interval, IntervalList}
import org.apache.commons.math3.distribution.NormalDistribution
Expand All @@ -41,6 +42,7 @@ import scala.math.{max, min}
class CollectDuplexSeqMetricsTest extends UnitSpec {
private val MI = "MI"
private val RX = "RX"
private val aD = "aD"

// Case class to hold pointers to all the outputs of CollectDuplexSeqMetrics
private case class Outputs(families: Path, duplexFamilies: Path, umis: Path, duplexUmis: Path, yields: Path, plots: Path) {
Expand Down Expand Up @@ -312,6 +314,14 @@ class CollectDuplexSeqMetricsTest extends UnitSpec {
duplexFamilies.find(f => f.ab_size == 6 && f.ba_size == 0).get.count shouldBe 1
}

it should "raise an exception if consensus records are provided" in {
val builder = new SamBuilder(readLength=100, sort=Some(SamOrder.TemplateCoordinate))
AllPerReadTags.foreach {
tag => builder.addPair(contig=1, start1=1000, start2=1100, attrs=Map(RX -> "AAA-GGG", MI -> "1/A", tag -> 10))
an[IllegalArgumentException] shouldBe thrownBy { exec(builder) }
}
}

"CollectDuplexSeqMetrics.updateUmiMetrics" should "not count duplex umis" in collector(duplex=false).foreach { c =>
val builder = new SamBuilder(readLength=10)
builder.addPair(start1=100, start2=200, attrs=Map(RX -> "AAA-CCC", MI -> "1/A"))
Expand Down
Loading