From 4232757e3abdd122d684e9dce1848c8e1ab77639 Mon Sep 17 00:00:00 2001 From: clintval Date: Mon, 22 Apr 2024 09:37:51 -0700 Subject: [PATCH 1/4] Make PileupBuilder.includeMapPositionsOutsideFrInsert intuitively correct --- .../bam/pileup/PileupBuilder.scala | 15 +++----- .../fulcrumgenomics/testing/SamBuilder.scala | 12 +++--- .../bam/pileup/PileupBuilderTest.scala | 38 +++++++++++++++++++ 3 files changed, 51 insertions(+), 14 deletions(-) diff --git a/src/main/scala/com/fulcrumgenomics/bam/pileup/PileupBuilder.scala b/src/main/scala/com/fulcrumgenomics/bam/pileup/PileupBuilder.scala index 1f0bba4b8..0db04bb5e 100644 --- a/src/main/scala/com/fulcrumgenomics/bam/pileup/PileupBuilder.scala +++ b/src/main/scala/com/fulcrumgenomics/bam/pileup/PileupBuilder.scala @@ -116,14 +116,11 @@ object PileupBuilder { ) } - /** Returns true if is in a mapped FR pair but the position is outside the insert coordinates of . - * Returns false if is in a mapped FR pair and the position is inside the insert coordinates of or - * is not in a mapped FR pair. - */ - private def positionIsOutsideFrInsert(rec: SamRecord, refIndex: Int, pos: Int): Boolean = { - rec.isFrPair && { + /** Returns true if is in a mapped FR pair and the position is inside the insert coordinates of . */ + private def positionIsInsideFrInsert(rec: SamRecord, refIndex: Int, pos: Int): Boolean = { + refIndex == rec.refIndex && rec.isFrPair && { val (start, end) = Bams.insertCoordinates(rec) - rec.refIndex == refIndex && pos >= start && pos <= end + pos >= start && pos <= end } } @@ -200,8 +197,8 @@ trait PileupBuilder extends PileupParameters { if (compare) compare = this.acceptRecord(rec) if (compare) compare = rec.end >= pos if (compare) compare = rec.start <= pos || PileupBuilder.startsWithInsertion(rec) - if (compare) compare = if (!includeMapPositionsOutsideFrInsert && rec.isFrPair) { - PileupBuilder.positionIsOutsideFrInsert(rec, refIndex = refIndex, pos = pos) + if (compare) compare = if (rec.paired && !includeMapPositionsOutsideFrInsert) { + PileupBuilder.positionIsInsideFrInsert(rec, refIndex = refIndex, pos = pos) } else { compare } compare } diff --git a/src/main/scala/com/fulcrumgenomics/testing/SamBuilder.scala b/src/main/scala/com/fulcrumgenomics/testing/SamBuilder.scala index 64508991e..f6afda284 100644 --- a/src/main/scala/com/fulcrumgenomics/testing/SamBuilder.scala +++ b/src/main/scala/com/fulcrumgenomics/testing/SamBuilder.scala @@ -24,18 +24,19 @@ package com.fulcrumgenomics.testing -import java.nio.file.Files -import java.text.DecimalFormat -import java.util.concurrent.atomic.AtomicLong - import com.fulcrumgenomics.FgBioDef._ import com.fulcrumgenomics.alignment.Cigar import com.fulcrumgenomics.bam.api.SamRecord.{MissingBases, MissingQuals} import com.fulcrumgenomics.bam.api.{SamOrder, SamRecord, SamSource, SamWriter} import com.fulcrumgenomics.fasta.{SequenceDictionary, SequenceMetadata} import com.fulcrumgenomics.testing.SamBuilder._ +import htsjdk.samtools.SamPairUtil.PairOrientation import htsjdk.samtools._ +import java.nio.file.Files +import java.text.DecimalFormat +import java.util +import java.util.concurrent.atomic.AtomicLong import scala.collection.mutable.ArrayBuffer import scala.util.Random @@ -173,7 +174,8 @@ class SamBuilder(val readLength: Int=100, r2("RG") = this.rg.getId attrs.foreach { case (key, value) => r2(key) = value } - SamPairUtil.setMateInfo(r1.asSam, r2.asSam, true) + val orientations = util.Arrays.asList(PairOrientation.values(): _*) + SamPairUtil.setProperPairAndMateInfo(r1.asSam, r2.asSam, orientations, true) val recs = Seq(r1, r2) this.records ++= recs recs diff --git a/src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala b/src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala index 13ed46b2d..3dc82cd6a 100644 --- a/src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala +++ b/src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala @@ -289,6 +289,26 @@ class PileupBuilderTest extends UnitSpec { piler.safelyClose() } + it should "not filter out single-end records when we are removing records outside the insert of FR pairs" in { + val builder = new SamBuilder(readLength = ReadLength, sd = Some(TestSequenceDictionary), sort = Some(Coordinate)) + + builder.addFrag(name = "q1", contig = Chr1Index, start = 100) + + val source = builder.toSource + val piler = PileupBuilder( + source, + accessPattern = accessPattern, + mappedPairsOnly = false, + includeMapPositionsOutsideFrInsert = true + ) + + piler.pileup(Chr1, 100).depth shouldBe 1 + piler.pileup(Chr1, 100 + ReadLength - 1).depth shouldBe 1 + + source.safelyClose() + piler.safelyClose() + } + it should "filter out records where a position is outside the insert for an FR pair" in { val builder = new SamBuilder(readLength = ReadLength, sd = Some(TestSequenceDictionary), sort = Some(Coordinate)) @@ -323,6 +343,24 @@ class PileupBuilderTest extends UnitSpec { piler.safelyClose() } + it should "exclude records that appear to be in an FR pair but are on different chromosomes" in { + val builder = new SamBuilder(readLength = ReadLength, sd = Some(TestSequenceDictionary), sort = Some(Coordinate)) + + builder.addPair(name = "q1", contig = Chr1Index, contig2 = Some(Chr2Index), start1 = 100, start2 = 125) + + val source = builder.toSource + val piler = PileupBuilder(source, accessPattern = accessPattern, includeMapPositionsOutsideFrInsert = false) + + piler.pileup(Chr1, 100).depth shouldBe 0 + piler.pileup(Chr1, 100 + ReadLength - 1).depth shouldBe 0 + + piler.pileup(Chr2, 125).depth shouldBe 0 + piler.pileup(Chr2, 125 + ReadLength - 1).depth shouldBe 0 + + source.safelyClose() + piler.safelyClose() + } + it should "not filter out records where a position is outside what might look like an 'insert' for a non-FR pair" in { val builder = new SamBuilder(readLength = ReadLength, sd = Some(TestSequenceDictionary), sort = Some(Coordinate)) From 6956ac44ef272d3d46240ca6b662859a83c96c16 Mon Sep 17 00:00:00 2001 From: clintval Date: Wed, 24 Apr 2024 19:57:56 -0700 Subject: [PATCH 2/4] Revert back to the original isFrPair filtering logic and document better --- .../bam/pileup/PileupBuilder.scala | 12 ++++++++++-- .../bam/pileup/PileupBuilderTest.scala | 18 ------------------ 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/src/main/scala/com/fulcrumgenomics/bam/pileup/PileupBuilder.scala b/src/main/scala/com/fulcrumgenomics/bam/pileup/PileupBuilder.scala index 0db04bb5e..50c7db322 100644 --- a/src/main/scala/com/fulcrumgenomics/bam/pileup/PileupBuilder.scala +++ b/src/main/scala/com/fulcrumgenomics/bam/pileup/PileupBuilder.scala @@ -61,7 +61,15 @@ object PileupBuilder { /** Allow records flagged as supplementary alignments to contribute to a pileup by default. */ val includeSupplementalAlignments: Boolean = false - /** Exclude any record of an FR pair where the site requested is outside the insert by default. */ + /** For FR pairs only, determine if we should keep the pair of reads if the site requested is outside of the + * insert. By default this filter is set to which means for FR pairs where R1 and R2 overlap each other + * and *extend* past the start coordinates of their mate, the pair will be filtered out if the position requested + * overlaps the end of either read in the span that is beyond the start coordinate of the read's mate. + * + * See the following GitHub issue comment for a visualization of when this filter applies: + * + * - https://github.com/fulcrumgenomics/fgbio/issues/980#issuecomment-2075049301 + */ val includeMapPositionsOutsideFrInsert: Boolean = false } @@ -197,7 +205,7 @@ trait PileupBuilder extends PileupParameters { if (compare) compare = this.acceptRecord(rec) if (compare) compare = rec.end >= pos if (compare) compare = rec.start <= pos || PileupBuilder.startsWithInsertion(rec) - if (compare) compare = if (rec.paired && !includeMapPositionsOutsideFrInsert) { + if (compare) compare = if (!includeMapPositionsOutsideFrInsert && rec.isFrPair) { PileupBuilder.positionIsInsideFrInsert(rec, refIndex = refIndex, pos = pos) } else { compare } compare diff --git a/src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala b/src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala index 3dc82cd6a..faf831113 100644 --- a/src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala +++ b/src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala @@ -343,24 +343,6 @@ class PileupBuilderTest extends UnitSpec { piler.safelyClose() } - it should "exclude records that appear to be in an FR pair but are on different chromosomes" in { - val builder = new SamBuilder(readLength = ReadLength, sd = Some(TestSequenceDictionary), sort = Some(Coordinate)) - - builder.addPair(name = "q1", contig = Chr1Index, contig2 = Some(Chr2Index), start1 = 100, start2 = 125) - - val source = builder.toSource - val piler = PileupBuilder(source, accessPattern = accessPattern, includeMapPositionsOutsideFrInsert = false) - - piler.pileup(Chr1, 100).depth shouldBe 0 - piler.pileup(Chr1, 100 + ReadLength - 1).depth shouldBe 0 - - piler.pileup(Chr2, 125).depth shouldBe 0 - piler.pileup(Chr2, 125 + ReadLength - 1).depth shouldBe 0 - - source.safelyClose() - piler.safelyClose() - } - it should "not filter out records where a position is outside what might look like an 'insert' for a non-FR pair" in { val builder = new SamBuilder(readLength = ReadLength, sd = Some(TestSequenceDictionary), sort = Some(Coordinate)) From dae6e978398b43df22741380656af4994600f883 Mon Sep 17 00:00:00 2001 From: clintval Date: Thu, 25 Apr 2024 09:11:17 -0700 Subject: [PATCH 3/4] chore: update test name --- .../com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala b/src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala index faf831113..842651bc3 100644 --- a/src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala +++ b/src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala @@ -289,7 +289,7 @@ class PileupBuilderTest extends UnitSpec { piler.safelyClose() } - it should "not filter out single-end records when we are removing records outside the insert of FR pairs" in { + it should "not filter out single-end records when we are not filter for mapped pairs only and we are not removing records where the position is outside the insert of FR pairs" in { val builder = new SamBuilder(readLength = ReadLength, sd = Some(TestSequenceDictionary), sort = Some(Coordinate)) builder.addFrag(name = "q1", contig = Chr1Index, start = 100) From c02e0b376a3bfba139ce1266f3085f9fd490042a Mon Sep 17 00:00:00 2001 From: Nils Homer Date: Thu, 25 Apr 2024 12:11:41 -0700 Subject: [PATCH 4/4] fix: codecov try specifying the environment (#982) --- .github/workflows/unittests.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/unittests.yaml b/.github/workflows/unittests.yaml index d1e897ecd..aa3e373e5 100644 --- a/.github/workflows/unittests.yaml +++ b/.github/workflows/unittests.yaml @@ -5,6 +5,7 @@ on: [push, pull_request] jobs: test: runs-on: ubuntu-latest + environment: github-actions steps: - name: Checkout uses: actions/checkout@v2