From 6aa20abfdc93bb58f135c8b00a2aee389f7eb93a Mon Sep 17 00:00:00 2001 From: Nils Homer Date: Mon, 18 Dec 2023 16:59:48 -0700 Subject: [PATCH 1/4] ZipperBams to produce mate score ("ms") for samtools markdup Fixes: #951 --- src/main/scala/com/fulcrumgenomics/bam/Bams.scala | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/main/scala/com/fulcrumgenomics/bam/Bams.scala b/src/main/scala/com/fulcrumgenomics/bam/Bams.scala index 61d6d52aa..8a422e468 100644 --- a/src/main/scala/com/fulcrumgenomics/bam/Bams.scala +++ b/src/main/scala/com/fulcrumgenomics/bam/Bams.scala @@ -105,16 +105,21 @@ case class Template(r1: Option[SamRecord], Template(x1, x2) } - /** Fixes mate information and sets mate cigar on all primary and supplementary (but not secondary) records. */ + /** Fixes mate information and sets mate cigar and mate score on all primary and supplementary (but not secondary) records. */ def fixMateInfo(): Unit = { + // Developer note: the mate score ("ms") tag is used by samtools markdup for (primary <- r1; supp <- r2Supplementals) { SamPairUtil.setMateInformationOnSupplementalAlignment(supp.asSam, primary.asSam, true) + supp("ms") = primary[Int]("AS") } for (primary <- r2; supp <- r1Supplementals) { SamPairUtil.setMateInformationOnSupplementalAlignment(supp.asSam, primary.asSam, true) + supp("ms") = primary[Int]("AS") } for (first <- r1; second <- r2) { SamPairUtil.setMateInfo(first.asSam, second.asSam, true) + first("ms") = second[Int]("AS") + second("ms") = first[Int]("AS") } } From 7e33b829742e47c85c50e86a1a2048d2caa7a6c1 Mon Sep 17 00:00:00 2001 From: Nils Homer Date: Wed, 27 Dec 2023 16:34:14 -0700 Subject: [PATCH 2/4] Update src/main/scala/com/fulcrumgenomics/bam/Bams.scala --- src/main/scala/com/fulcrumgenomics/bam/Bams.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/scala/com/fulcrumgenomics/bam/Bams.scala b/src/main/scala/com/fulcrumgenomics/bam/Bams.scala index 8a422e468..6cf89eee7 100644 --- a/src/main/scala/com/fulcrumgenomics/bam/Bams.scala +++ b/src/main/scala/com/fulcrumgenomics/bam/Bams.scala @@ -110,16 +110,16 @@ case class Template(r1: Option[SamRecord], // Developer note: the mate score ("ms") tag is used by samtools markdup for (primary <- r1; supp <- r2Supplementals) { SamPairUtil.setMateInformationOnSupplementalAlignment(supp.asSam, primary.asSam, true) - supp("ms") = primary[Int]("AS") + primary.get[Int]("AS").foreach(supp("ms") = _)) } for (primary <- r2; supp <- r1Supplementals) { SamPairUtil.setMateInformationOnSupplementalAlignment(supp.asSam, primary.asSam, true) - supp("ms") = primary[Int]("AS") + primary.get[Int]("AS").foreach(supp("ms") = _)) } for (first <- r1; second <- r2) { SamPairUtil.setMateInfo(first.asSam, second.asSam, true) - first("ms") = second[Int]("AS") - second("ms") = first[Int]("AS") + first.get[Int]("AS").foreach(second("ms") = _)) + second.get[Int]("AS").foreach(first("ms") = _)) } } From 2affbac107b17fb1a1fd9a093661f881371f7beb Mon Sep 17 00:00:00 2001 From: Nils Homer Date: Wed, 27 Dec 2023 16:36:51 -0700 Subject: [PATCH 3/4] Update src/main/scala/com/fulcrumgenomics/bam/Bams.scala --- src/main/scala/com/fulcrumgenomics/bam/Bams.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/scala/com/fulcrumgenomics/bam/Bams.scala b/src/main/scala/com/fulcrumgenomics/bam/Bams.scala index 6cf89eee7..623b97d76 100644 --- a/src/main/scala/com/fulcrumgenomics/bam/Bams.scala +++ b/src/main/scala/com/fulcrumgenomics/bam/Bams.scala @@ -110,16 +110,16 @@ case class Template(r1: Option[SamRecord], // Developer note: the mate score ("ms") tag is used by samtools markdup for (primary <- r1; supp <- r2Supplementals) { SamPairUtil.setMateInformationOnSupplementalAlignment(supp.asSam, primary.asSam, true) - primary.get[Int]("AS").foreach(supp("ms") = _)) + primary.get[Int]("AS").foreach(supp("ms") = _) } for (primary <- r2; supp <- r1Supplementals) { SamPairUtil.setMateInformationOnSupplementalAlignment(supp.asSam, primary.asSam, true) - primary.get[Int]("AS").foreach(supp("ms") = _)) + primary.get[Int]("AS").foreach(supp("ms") = _) } for (first <- r1; second <- r2) { SamPairUtil.setMateInfo(first.asSam, second.asSam, true) - first.get[Int]("AS").foreach(second("ms") = _)) - second.get[Int]("AS").foreach(first("ms") = _)) + first.get[Int]("AS").foreach(second("ms") = _) + second.get[Int]("AS").foreach(first("ms") = _) } } From 87e82882ce324ce6f305040e6eb2393d0c4f0498 Mon Sep 17 00:00:00 2001 From: Nils Homer Date: Fri, 9 Feb 2024 10:18:59 -0500 Subject: [PATCH 4/4] add to the unit test a read with ms --- .../com/fulcrumgenomics/bam/BamsTest.scala | 44 ++++++++++++------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/src/test/scala/com/fulcrumgenomics/bam/BamsTest.scala b/src/test/scala/com/fulcrumgenomics/bam/BamsTest.scala index c6d1b699e..6e7a9e1ff 100644 --- a/src/test/scala/com/fulcrumgenomics/bam/BamsTest.scala +++ b/src/test/scala/com/fulcrumgenomics/bam/BamsTest.scala @@ -473,6 +473,7 @@ class BamsTest extends UnitSpec { builder.addPair(name="q1", contig=1, contig2=Some(2), start1=100, start2=200, cigar1="50M50S", cigar2="50S50M", mapq1=51, mapq2=52) builder.addPair(name="q1", contig=3, contig2=Some(4), start1=300, start2=400, cigar1="50S50M", cigar2="50M50S", mapq1=21, mapq2=22) .foreach(_.supplementary = true) + builder.addPair(name="q2", contig=1, contig2=Some(2), start1=100, start2=200, cigar1="50M50S", cigar2="50S50M", mapq1=51, mapq2=52, attrs=Map(("ms", 2))) val recs = builder.toIndexedSeq.tapEach { r => r.mateMapped = false @@ -482,21 +483,34 @@ class BamsTest extends UnitSpec { r.remove("MQ") } - val template = Template(recs.iterator) - template.fixMateInfo() - - template.allReads.foreach { r => - r.mateMapped shouldBe true - - if (r.firstOfPair) { - r.mateRefIndex shouldBe 2 - r.mateStart shouldBe 200 - r.mateCigar.value.toString() shouldBe "50S50M" - } - else { - r.mateRefIndex shouldBe 1 - r.mateStart shouldBe 100 - r.mateCigar.value.toString() shouldBe "50M50S" + val q1Template = Template(recs.iterator.filter(_.name == "q1")) + val q2Template = Template(recs.iterator.filter(_.name == "q2")) + val templates = Seq(q1Template, q2Template) + + templates.foreach { template => + template.fixMateInfo() + + template.allReads.foreach { r => + r.mateMapped shouldBe true + + if (r.firstOfPair) { + r.mateRefIndex shouldBe 2 + r.mateStart shouldBe 200 + r.mateCigar.value.toString() shouldBe "50S50M" + } + else { + r.mateRefIndex shouldBe 1 + r.mateStart shouldBe 100 + r.mateCigar.value.toString() shouldBe "50M50S" + } + + if (r.name == "q1") { + r.get[Int]("ms").isEmpty shouldBe true + } + else { + r.name shouldBe "q2" + r.get[Int]("ms").value shouldBe 2 + } } } }