Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add an option to store sample base qualities in the QT for FastqToBam #933

Merged
merged 3 commits into from
Sep 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/main/scala/com/fulcrumgenomics/fastq/FastqToBam.scala
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ class FastqToBam
@arg(flag='s', doc="If true, queryname sort the BAM file, otherwise preserve input order.") val sort: Boolean = false,
@arg(flag='u', doc="Tag in which to store molecular barcodes/UMIs.") val umiTag: String = ConsensusTags.UmiBases,
@arg(flag='q', doc="Tag in which to store molecular barcode/UMI qualities.") val umiQualTag: Option[String] = None,
@arg(flag='Q', doc="Store the sample barcode qualities in the QT Tag.") val storeSampleBarcodeQualities: Boolean = false,
@arg(flag='n', doc="Extract UMI(s) from read names and prepend to UMIs from reads.") val extractUmisFromReadNames: Boolean = false,
@arg( doc="Read group ID to use in the file header.") val readGroupId: String = "A",
@arg( doc="The name of the sequenced sample.") val sample: String,
Expand Down Expand Up @@ -157,6 +158,7 @@ class FastqToBam
try {
val subs = fqs.iterator.zip(structures.iterator).flatMap { case(fq, rs) => rs.extract(fq.bases, fq.quals) }.toIndexedSeq
val sampleBarcode = subs.iterator.filter(_.kind == SampleBarcode).map(_.bases).mkString("-")
val sampleQuals = subs.iterator.filter(_.kind == SampleBarcode).map(_.quals).mkString(" ")
val umi = subs.iterator.filter(_.kind == MolecularBarcode).map(_.bases).mkString("-")
val umiQual = subs.iterator.filter(_.kind == MolecularBarcode).map(_.quals).mkString(" ")
val templates = subs.iterator.filter(_.kind == Template).toList
Expand All @@ -166,7 +168,7 @@ class FastqToBam

templates.zipWithIndex.map { case (read, index) =>
// If the template read had no bases, we'll substitute in a single N @ Q2 below to keep htsjdk happy
val empty = read.bases.length == 0
val empty = read.bases.isEmpty

val rec = SamRecord(header)
rec(ReservedTagConstants.READ_GROUP_ID) = this.readGroupId
Expand All @@ -181,6 +183,7 @@ class FastqToBam
}

if (sampleBarcode.nonEmpty) rec("BC") = sampleBarcode
if (storeSampleBarcodeQualities && sampleQuals.nonEmpty) rec("QT") = sampleQuals

// Set the UMI on the read depending on whether we got UMIs from the read names, reads or both
(umi, umiFromReadName) match {
Expand Down
28 changes: 28 additions & 0 deletions src/test/scala/com/fulcrumgenomics/fastq/FastqToBamTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,34 @@ class FastqToBamTest extends UnitSpec {
recs(3).basesString shouldBe "CCCCCCCCCC"
}

it should "extract sample barcode qualities when requested" in {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@galaxy001, take a look at the inputs and outputs and see if this suffices.

val r1 = fq(
FastqRecord("q1:2:3:4:5:6:7", "AAACCCAAAA", "ABCDEFGHIJ"),
FastqRecord("q2:2:3:4:5:6:7", "TAANNNAAAA", "BCDEFGHIJK"),
FastqRecord("q3:2:3:4:5:6:7", "GAACCCTCGA", "CDEFGHIJKL"),
)
val bam = makeTempFile("fastqToBamTest.", ".bam")

Seq(true, false).foreach { storeSampleBarcodeQualities =>
new FastqToBam(input = Seq(r1), output = bam, sample = "s", library = "l", readStructures = Seq(rs("3B3M3B+T")), storeSampleBarcodeQualities = storeSampleBarcodeQualities).execute()
val recs = readBamRecs(bam)
recs should have size 3
recs(0).apply[String]("BC") shouldBe "AAA-AAA"
recs(1).apply[String]("BC") shouldBe "TAA-AAA"
recs(2).apply[String]("BC") shouldBe "GAA-TCG"

if (storeSampleBarcodeQualities) {
recs(0).apply[String]("QT") shouldBe "ABC GHI"
recs(1).apply[String]("QT") shouldBe "BCD HIJ"
recs(2).apply[String]("QT") shouldBe "CDE IJK"
} else {
Range.inclusive(0, 2).foreach { index =>
recs(index).contains("QT") shouldBe false
}
}
}
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you either add a second pass here, or assert in one of the other tests, that if you don't supply a value for storeSampleBarcodeQualities that the QT tag doesn't get set with anything?

it should "extract UMIs from read names when requested" in {
val r1 = fq(
FastqRecord("q1:2:3:4:5:6:7:ACGT", "AAAAAAAAAA", "=========="),
Expand Down