Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add helpers for mateCigar and matesOverlap on SamRecord #717

Merged
merged 8 commits into from
Sep 28, 2021
Merged
19 changes: 14 additions & 5 deletions src/main/scala/com/fulcrumgenomics/bam/api/SamRecord.scala
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import com.fulcrumgenomics.alignment.Cigar
import htsjdk.samtools
import htsjdk.samtools.SamPairUtil.PairOrientation
import htsjdk.samtools._
import htsjdk.samtools.util.CoordMath

// TODO long-term: methods for 5'end, 3' end, unclipped 5' end and unclipped 3' end
// TODO long-term: replacement for alignment blocks?
Expand Down Expand Up @@ -169,22 +170,30 @@ trait SamRecord {

@inline final def mateStart: Int = getMateAlignmentStart
@inline final def mateStart_=(s: Int):Unit = setMateAlignmentStart(s)

@inline final def mateCigar: Option[Cigar] = {
require(paired && mateMapped, "Cannot get a mate cigar on read without a mapped mate.")
get[String]("MC").map(Cigar.apply)
}
@inline final def mateEnd: Option[Int] = {
require(paired && mateMapped, "Cannot get mate end position on read without a mapped mate.")
get[String]("MC").map(cig => mateStart + Cigar(cig).lengthOnTarget - 1)
mateCigar.map(_.lengthOnTarget + mateStart - 1)
}

@inline final def mateUnclippedStart: Option[Int] = {
require(paired && mateMapped, "Cannot get mate unclipped start position on read without a mapped mate.")
get[String]("MC").map(cig => mateStart - Cigar(cig).iterator.takeWhile(_.operator.isClipping).map(_.length).sum)
mateCigar.map(cigar => mateStart - cigar.iterator.takeWhile(_.operator.isClipping).map(_.length).sum)
}
@inline final def mateUnclippedEnd: Option[Int] = {
require(paired && mateMapped, "Cannot get mate unclipped end position on read without a mapped mate.")
get[String]("MC").map { cig =>
val cigar = Cigar(cig)
mateCigar.map { cigar =>
mateStart + cigar.lengthOnTarget - 1 + cigar.reverseIterator.takeWhile(_.operator.isClipping).map(_.length).sum
}
}
@inline final def mateOverlaps: Option[Boolean] = {
Option.when(mapped && paired && mateMapped) {
clintval marked this conversation as resolved.
Show resolved Hide resolved
refIndex == mateRefIndex && mateEnd.exists { mateEnd => CoordMath.overlaps(start, end, mateStart, mateEnd) }
}
}

@inline final def insertSize: Int = getInferredInsertSize
@inline final def insertSize_=(s: Int):Unit = setInferredInsertSize(s)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,9 +206,8 @@ object EstimateRnaSeqInsertSize {
private def DifferentReferenceIndexFilter = filter(r => r.refIndex != r.mateRefIndex)

private[rnaseq] def getAndRequireMateCigar(rec: SamRecord): Cigar = {
rec.get[String](SAMTag.MC.name()) match {
case None => throw new IllegalStateException(s"Mate CIGAR (Tag MC) not found for $rec, consider using SetMateInformation.")
case Some(mc) => Cigar(mc)
rec.mateCigar.getOrElse {
throw new IllegalStateException(s"Mate CIGAR (Tag 'MC') not found for $rec, consider using SetMateInformation.")
}
}

Expand Down
64 changes: 62 additions & 2 deletions src/test/scala/com/fulcrumgenomics/bam/api/SamRecordTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ package com.fulcrumgenomics.bam.api
import com.fulcrumgenomics.alignment.Cigar
import com.fulcrumgenomics.testing.SamBuilder.{Minus, Plus}
import com.fulcrumgenomics.testing.{SamBuilder, UnitSpec}
import htsjdk.samtools.{SAMTag, TextCigarCodec}
import htsjdk.samtools.TextCigarCodec
import org.scalatest.OptionValues

class SamRecordTest extends UnitSpec with OptionValues {
Expand Down Expand Up @@ -186,12 +186,41 @@ class SamRecordTest extends UnitSpec with OptionValues {
rec.basesString should not be clone.basesString
}

"SamRecord.mateCigar" should "raise an exception for a fragment without a mate" in {
val Some(rec) = new SamBuilder(readLength = 100).addFrag(start = 1, cigar = "100M")
an[IllegalArgumentException] shouldBe thrownBy { rec.mateCigar }
}

it should "raise an exception for a paired read if the mate is unmapped" in {
val Seq(rec1, _): Seq[SamRecord] = new SamBuilder(readLength = 100).addPair(start1 = 1, unmapped2 = true)
an[IllegalArgumentException] shouldBe thrownBy { rec1.mateCigar }
}

it should "return None for a paired read without the 'MC' tag set" in {
val Seq(rec1, rec2): Seq[SamRecord] = new SamBuilder(readLength = 100).addPair(start1 = 1, start2 = 200)
rec1.remove("MC")
rec2.remove("MC")
rec1.mateCigar shouldBe None
rec2.mateCigar shouldBe None
}

it should "return the mates cigar for a paired read with the 'MC' tag set" in {
val Seq(rec1, rec2): Seq[SamRecord] = new SamBuilder(readLength = 100).addPair(
start1 = 1,
start2 = 200,
cigar1 = "50M1I49M",
cigar2 = "49M1I50M"
)
rec1.mateCigar.value shouldBe Cigar("49M1I50M")
rec2.mateCigar.value shouldBe Cigar("50M1I49M")
}

"SamRecord.mateUnclippedStart/End" should "return None if no mate cigar set" in {
val builder = new SamBuilder(readLength=50)
val recs = builder.addPair(start1=10, start2=50)

recs.foreach { r =>
r(SAMTag.MC.name) = null // Clear mate cigar
r("MC") = null // Clear mate cigar
r.mateUnclippedStart.isEmpty shouldBe true
r.mateUnclippedEnd.isEmpty shouldBe true
}
Expand Down Expand Up @@ -221,4 +250,35 @@ class SamRecordTest extends UnitSpec with OptionValues {
r2.unclippedEnd shouldBe 94
r1.mateUnclippedEnd.value shouldBe r2.unclippedEnd
}

"SamRecord.mateOverlaps" should "return None if any pre-requisite check isn't true" in {
new SamBuilder(readLength = 100).addFrag(start = 1).value.mateOverlaps shouldBe None
new SamBuilder(readLength = 100).addPair(start1 = 1, unmapped2 = true).foreach(_.mateOverlaps shouldBe None)
new SamBuilder(readLength = 100).addPair(start2 = 1, unmapped1 = true).foreach(_.mateOverlaps shouldBe None)
}

it should "return false when both reads in a pair are mapped but on different contigs" in {
new SamBuilder(readLength = 100).addPair(start1 = 1, start2 = 1, contig = 0, contig2 = Some(1))
.foreach(_.mateOverlaps.value shouldBe false)
}

it should "return false when both reads in a pair are mapped to the same contig but do not overlap" in {
new SamBuilder(readLength = 100).addPair(start1 = 1, start2 = 101, contig = 0, contig2 = Some(0))
.foreach(_.mateOverlaps.value shouldBe false)
}

it should "return true when reads overlap by one base pair in FR orientation" in {
new SamBuilder(readLength = 100).addPair(start1 = 1, start2 = 100, contig = 0, contig2 = Some(0))
.foreach(_.mateOverlaps.value shouldBe true)
}

it should "return true when reads overlap by one base pair in RF orientation" in {
new SamBuilder(readLength = 100).addPair(start1 = 100, start2 = 1, contig = 0, contig2 = Some(0))
.foreach(_.mateOverlaps.value shouldBe true)
}

it should "return true when reads overlap completely" in {
new SamBuilder(readLength = 100).addPair(start1 = 1, start2 = 1, contig = 0, contig2 = Some(0))
.foreach(_.mateOverlaps.value shouldBe true)
}
}