Skip to content

Commit

Permalink
ZipperBams to produce mate score ("ms") for samtools markdup (#952)
Browse files Browse the repository at this point in the history
* ZipperBams to produce mate score ("ms") for samtools markdup

Fixes: #951
  • Loading branch information
nh13 authored Feb 9, 2024
1 parent 0a03aa9 commit 3a74fd2
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 16 deletions.
7 changes: 6 additions & 1 deletion src/main/scala/com/fulcrumgenomics/bam/Bams.scala
Original file line number Diff line number Diff line change
Expand Up @@ -105,16 +105,21 @@ case class Template(r1: Option[SamRecord],
Template(x1, x2)
}

/** Fixes mate information and sets mate cigar on all primary and supplementary (but not secondary) records. */
/** Fixes mate information and sets mate cigar and mate score on all primary and supplementary (but not secondary) records. */
def fixMateInfo(): Unit = {
// Developer note: the mate score ("ms") tag is used by samtools markdup
for (primary <- r1; supp <- r2Supplementals) {
SamPairUtil.setMateInformationOnSupplementalAlignment(supp.asSam, primary.asSam, true)
primary.get[Int]("AS").foreach(supp("ms") = _)
}
for (primary <- r2; supp <- r1Supplementals) {
SamPairUtil.setMateInformationOnSupplementalAlignment(supp.asSam, primary.asSam, true)
primary.get[Int]("AS").foreach(supp("ms") = _)
}
for (first <- r1; second <- r2) {
SamPairUtil.setMateInfo(first.asSam, second.asSam, true)
first.get[Int]("AS").foreach(second("ms") = _)
second.get[Int]("AS").foreach(first("ms") = _)
}
}

Expand Down
44 changes: 29 additions & 15 deletions src/test/scala/com/fulcrumgenomics/bam/BamsTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,7 @@ class BamsTest extends UnitSpec {
builder.addPair(name="q1", contig=1, contig2=Some(2), start1=100, start2=200, cigar1="50M50S", cigar2="50S50M", mapq1=51, mapq2=52)
builder.addPair(name="q1", contig=3, contig2=Some(4), start1=300, start2=400, cigar1="50S50M", cigar2="50M50S", mapq1=21, mapq2=22)
.foreach(_.supplementary = true)
builder.addPair(name="q2", contig=1, contig2=Some(2), start1=100, start2=200, cigar1="50M50S", cigar2="50S50M", mapq1=51, mapq2=52, attrs=Map(("ms", 2)))

val recs = builder.toIndexedSeq.tapEach { r =>
r.mateMapped = false
Expand All @@ -482,21 +483,34 @@ class BamsTest extends UnitSpec {
r.remove("MQ")
}

val template = Template(recs.iterator)
template.fixMateInfo()

template.allReads.foreach { r =>
r.mateMapped shouldBe true

if (r.firstOfPair) {
r.mateRefIndex shouldBe 2
r.mateStart shouldBe 200
r.mateCigar.value.toString() shouldBe "50S50M"
}
else {
r.mateRefIndex shouldBe 1
r.mateStart shouldBe 100
r.mateCigar.value.toString() shouldBe "50M50S"
val q1Template = Template(recs.iterator.filter(_.name == "q1"))
val q2Template = Template(recs.iterator.filter(_.name == "q2"))
val templates = Seq(q1Template, q2Template)

templates.foreach { template =>
template.fixMateInfo()

template.allReads.foreach { r =>
r.mateMapped shouldBe true

if (r.firstOfPair) {
r.mateRefIndex shouldBe 2
r.mateStart shouldBe 200
r.mateCigar.value.toString() shouldBe "50S50M"
}
else {
r.mateRefIndex shouldBe 1
r.mateStart shouldBe 100
r.mateCigar.value.toString() shouldBe "50M50S"
}

if (r.name == "q1") {
r.get[Int]("ms").isEmpty shouldBe true
}
else {
r.name shouldBe "q2"
r.get[Int]("ms").value shouldBe 2
}
}
}
}
Expand Down

0 comments on commit 3a74fd2

Please sign in to comment.