From 90a2ec42b6c4ca8a601de7beaea5913b76d24f10 Mon Sep 17 00:00:00 2001 From: Vince Reuter Date: Tue, 3 Dec 2024 13:18:30 +0100 Subject: [PATCH] remove the .zarr component of the PositionName if present, and make further assertions about the PositionName; close #398 --- src/main/scala/LabelAndFilterLocusSpots.scala | 62 +++++++++++++------ .../scala/TestLabelAndFilterLocusSpots.scala | 20 +++++- 2 files changed, 63 insertions(+), 19 deletions(-) diff --git a/src/main/scala/LabelAndFilterLocusSpots.scala b/src/main/scala/LabelAndFilterLocusSpots.scala index d75ffea6..cf2cfeb0 100644 --- a/src/main/scala/LabelAndFilterLocusSpots.scala +++ b/src/main/scala/LabelAndFilterLocusSpots.scala @@ -3,7 +3,7 @@ package at.ac.oeaw.imba.gerlich.looptrace import scala.util.Try import upickle.default.* import cats.* -import cats.data.{ NonEmptyList as NEL, ValidatedNel } +import cats.data.{ EitherNel, NonEmptyList, ValidatedNel } import cats.syntax.all.* import mouse.boolean.* import scopt.OParser @@ -149,7 +149,7 @@ object LabelAndFilterLocusSpots extends ScoptCliReaders, StrictLogging: distanceToReferenceColumn = "ref_dist" ) - final case class ParseError(errorMessages: NEL[String]) extends Exception(s"${errorMessages.size} errors: ${errorMessages}") + final case class ParseError(errorMessages: NonEmptyList[String]) extends Exception(s"${errorMessages.size} errors: ${errorMessages}") end ParserConfig val parserBuilder = OParser.builder[CliConfig] @@ -325,7 +325,7 @@ object LabelAndFilterLocusSpots extends ScoptCliReaders, StrictLogging: parsePixelY, parsePixelX, ) => { (record: Array[String]) => - (record.length === header.length).either(NEL.one(s"Record has ${record.length} fields but header has ${header.length}"), ()).flatMap{ + (record.length === header.length).either(NonEmptyList.one(s"Record has ${record.length} fields but header has ${header.length}"), ()).flatMap{ Function.const{( parseFov(record), parseRegion(record), @@ -402,7 +402,7 @@ object LabelAndFilterLocusSpots extends ScoptCliReaders, StrictLogging: basename: String, delimiter: Delimiter, overwrite: Boolean = false, - ): Unit = { + ): Unit = { if (!os.isDir(analysisOutfolder)) { os.makeDir.all(analysisOutfolder) } if (!os.isDir(pointsOutfolder)) { os.makeDir.all(pointsOutfolder) } // Include placeholder for field for label displayability column, which we don't need for CSV writing (only JSON, handled via codec). @@ -493,25 +493,45 @@ object LabelAndFilterLocusSpots extends ScoptCliReaders, StrictLogging: } } - private def stripZarrPrefixFromPositionName(rawPosName: String): String = - // TODO: regex match here on the prefix, then replace the .zarr match. - // Cases: no match --> use natural, 1 match --> excise .zarr, > 1 match --> error. - ??? + // Ensure that the .zarr suffix which is sometimes present in the + private[looptrace] def stripZarrPrefixFromPositionName(rawPosName: String): EitherNel[String, String] = + val modified = rawPosName.replaceAll(".zarr", "") // Account for the possibility of the .zarr polluting the true position name. + val expPrefix = "P" + val hasExpPrefix = + modified.startsWith(expPrefix).validatedNel(s"Missing expected prefix ($expPrefix)", ()) + val hasExpLength = + val expLength = expPrefix.length + 4 // Expect exactly 4 digits. + (modified.length === expLength).validatedNel(s"Unexpected length: ${modified.length}, not $expLength", ()) + val allDigitsAfterPrefix = + modified.tail + .filterNot(_.isDigit) + .toList.toNel + .toLeft(()) + .leftMap{ nonDigits => NonEmptyList.one(s"${nonDigits.length} non-digit character after prefix") } + .toValidated + (hasExpPrefix, hasExpLength, allDigitsAfterPrefix) + .tupled + .map(_ => modified) + .toEither + private def writePointsForNapari(folder: os.Path)(groupedByPos: List[(PositionName, List[TraceRecordPair])], roundsConfig: ImagingRoundsConfiguration) = { import NapariSortKey.given import NapariSortKey.* - val getOutfileAndHeader = (pos: PositionName, qcType: PointDisplayType) => { - val posNameBase = stripZarrPrefixFromPositionName(pos.show_) - val fp = folder / s"$posNameBase.${qcType.toString.toLowerCase}.csv" - val baseHeader = List("regionTime", "traceId", "locusTime", "traceIndex", "timeIndex", "z", "y", "x") - val header = qcType match { - case PointDisplayType.QCPass => baseHeader - case PointDisplayType.QCFail => baseHeader :+ "failCode" - case PointDisplayType.Invisible => throw new RuntimeException("Tried to create output file for invisible point type!") + + val getOutfileAndHeader: (PositionName, PointDisplayType) => EitherNel[String, (os.Path, List[String])] = + (pos: PositionName, qcType: PointDisplayType) => { + stripZarrPrefixFromPositionName(pos.show_).map{ posNameBase => + val fp = folder / s"$posNameBase.${qcType.toString.toLowerCase}.csv" + val baseHeader = List("regionTime", "traceId", "locusTime", "traceIndex", "timeIndex", "z", "y", "x") + val header = qcType match { + case PointDisplayType.QCPass => baseHeader + case PointDisplayType.QCFail => baseHeader :+ "failCode" + case PointDisplayType.Invisible => throw new RuntimeException("Tried to create output file for invisible point type!") + } + fp -> header + } } - fp -> header - } groupedByPos .flatMap{ (pos, traceRecordPairs) => traceRecordPairs.toNel.map(pos -> _) } @@ -532,6 +552,12 @@ object LabelAndFilterLocusSpots extends ScoptCliReaders, StrictLogging: } } val (outfile, header) = getOutfileAndHeader(pos, qcType) + .leftMap{ problems => + new Exception( + s"${problems.length} problem(s) getting header and output file for position name $pos: ${problems.mkString_("; ")}" + ) + } + .fold(throw _, identity) val outrecs = traceRecordPairs.map{ (t, r) => val p = r.centerInPixels val timeIndex = ( diff --git a/src/test/scala/TestLabelAndFilterLocusSpots.scala b/src/test/scala/TestLabelAndFilterLocusSpots.scala index 5943348e..8f839533 100644 --- a/src/test/scala/TestLabelAndFilterLocusSpots.scala +++ b/src/test/scala/TestLabelAndFilterLocusSpots.scala @@ -1,6 +1,8 @@ package at.ac.oeaw.imba.gerlich.looptrace import scala.io.Source +import cats.data.{ EitherNel, NonEmptyList } +import cats.syntax.all.* import org.scalacheck.{ Gen, Shrink } import org.scalactic.Equality import org.scalatest.funsuite.AnyFunSuite @@ -10,13 +12,14 @@ import org.scalatest.prop.Configuration.PropertyCheckConfiguration import org.scalatestplus.scalacheck.ScalaCheckPropertyChecks import com.github.tototoshi.csv.* -import at.ac.oeaw.imba.gerlich.gerlib.imaging.ImagingTimepoint +import at.ac.oeaw.imba.gerlich.gerlib.imaging.{ ImagingTimepoint, PositionName } import at.ac.oeaw.imba.gerlich.gerlib.numeric.* import at.ac.oeaw.imba.gerlich.looptrace.LabelAndFilterLocusSpots.{ ParserConfig, QcPassColumn, workflow } import at.ac.oeaw.imba.gerlich.looptrace.LocusSpotQC.* import at.ac.oeaw.imba.gerlich.looptrace.PathHelpers.* import at.ac.oeaw.imba.gerlich.looptrace.syntax.all.* +import org.scalatest.prop.TableFor2 /** Tests for the filtration of the individual supports (single FISH probes) of chromatin fiber traces */ class TestLabelAndFilterLocusSpots extends AnyFunSuite, ScalaCheckPropertyChecks, GenericSuite, should.Matchers: @@ -226,6 +229,21 @@ class TestLabelAndFilterLocusSpots extends AnyFunSuite, ScalaCheckPropertyChecks } } + test("PositionName cleanup works as expected"): + import at.ac.oeaw.imba.gerlich.looptrace.LabelAndFilterLocusSpots.stripZarrPrefixFromPositionName as cleanupPositionName + val testCases: TableFor2[String, EitherNel[String, String]] = + import io.github.iltotore.iron.autoRefine + Table( + ("argument", "expectedResult"), + ("P0001.zarr", "P0001".asRight), + ("P0002", "P0002".asRight), + ("P00001.zarr", NonEmptyList.one("Unexpected length: 6, not 5").asLeft), + ("P00001", NonEmptyList.one("Unexpected length: 6, not 5").asLeft) + ) + forAll (testCases) { (argument, expectedResult) => + cleanupPositionName(argument) shouldEqual expectedResult + } + /* Ancillary functions and types */ type CsvRows = Iterable[Map[String, String]] private def componentExpectationFile = getResourcePath("traces.labeled.unfiltered.csv")