From e197ce44233c4a6dcbbb8d9f5da6102cd4209e75 Mon Sep 17 00:00:00 2001 From: Michael Hipp Date: Wed, 21 Jun 2023 16:59:15 -0700 Subject: [PATCH] Add optional validation of kept read ratio to CorrectUmis (#917) --- .../scala/com/fulcrumgenomics/umi/CorrectUmis.scala | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/main/scala/com/fulcrumgenomics/umi/CorrectUmis.scala b/src/main/scala/com/fulcrumgenomics/umi/CorrectUmis.scala index 57c5b3fa2..c21d5af76 100644 --- a/src/main/scala/com/fulcrumgenomics/umi/CorrectUmis.scala +++ b/src/main/scala/com/fulcrumgenomics/umi/CorrectUmis.scala @@ -134,7 +134,8 @@ class CorrectUmis @arg(flag='U', doc="File of UMI sequences, one per line.", minElements=0) val umiFiles: Seq[FilePath] = Seq.empty, @arg(flag='t', doc="Tag in which UMIs are stored.") val umiTag: String = ConsensusTags.UmiBases, @arg(flag='x', doc="Don't store original UMIs upon correction.") val dontStoreOriginalUmis: Boolean = false, - @arg(doc="The number of uncorrected UMIs to cache; zero will disable the cache.") val cacheSize: Int = 100000 + @arg(doc="The number of uncorrected UMIs to cache; zero will disable the cache.") val cacheSize: Int = 100000, + @arg(doc="The minimum ratio of kept UMIs to accept. A ratio below this will cause a failure (but all files will still be written).") val minCorrected: Option[Double] = None ) extends FgBioTool with LazyLogging { validate(umis.nonEmpty || umiFiles.nonEmpty, "At least one UMI or UMI file must be provided.") @@ -142,6 +143,7 @@ class CorrectUmis Io.assertReadable(umiFiles) Io.assertCanWriteFile(output) rejects.foreach(Io.assertCanWriteFile(_)) + minCorrected.foreach(m => validate(m >= 0 && m <= 1, "--min-corrected must be between 0 and 1.")) // Construct the cache private lazy val cache = new LeastRecentlyUsedCache[String,UmiMatch](maxEntries = cacheSize) @@ -260,6 +262,14 @@ class CorrectUmis if (wrongLengthRecords > 0) logger.error(s"# ${wrongLengthRecords} had unexpected UMIs of differing lengths in the BAM file!") logger.error("###################################################################") } + + minCorrected.foreach { min => + val ratioKept = 1.0 * kept / totalRecords + assert(ratioKept >= min, + f"# Final ratio of reads kept / total was ${ratioKept}%2.2f (user specified minimum was ${min}%2.2f) " + + "This could indicate a mismatch between library preparation and the provided UMI file." + ) + } } /** Given a UMI sequence and a set of fixed UMIs, report the best match. */