From cb2c2f7c499bbedd170434c8f076bc42c3c5d82e Mon Sep 17 00:00:00 2001 From: LTLA Date: Fri, 28 Jun 2024 10:11:09 -0700 Subject: [PATCH] Streamlined the BlockThresholds definition by re-using Thresholds. This allows us to easily create a BlockThresholds from an existing Thresholds, which cuts down on redundant code. It also supports applications that need to return a constant-typed object from code that may or may not use blocks; in such cases, we can just promote a Thresholds object to BlockThresholds. --- include/scran/choose_filter_thresholds.hpp | 73 +++++++++++----------- tests/src/choose_filter_thresholds.cpp | 22 +++++-- 2 files changed, 54 insertions(+), 41 deletions(-) diff --git a/include/scran/choose_filter_thresholds.hpp b/include/scran/choose_filter_thresholds.hpp index f8c6c80..d0228fa 100644 --- a/include/scran/choose_filter_thresholds.hpp +++ b/include/scran/choose_filter_thresholds.hpp @@ -22,6 +22,12 @@ namespace scran { * Any outlier values are indicative of low-quality cells that should be filtered out. * Given an array of values, outliers are defined as those that are more than some number of median absolute deviations (MADs) from the median value. * Outliers can be defined in both directions or just a single direction, depending on the interpretation of the QC metric. + * + * For datasets with multiple blocks, we can also compute block-specific thresholds for each metric. + * This assumes that differences in the metric distributions between blocks are driven by uninteresting causes (e.g., differences in sequencing depth); + * variable thresholds can adapt to each block's distribution for effective removal of outliers. + * However, if the differences in the distributions between blocks are interesting, + * it may be preferable to ignore the blocking factor so that the MADs are correctly increased to reflect that variation. */ namespace choose_filter_thresholds { @@ -113,15 +119,6 @@ std::pair choose(Float_ median, Float_ mad, const Options& optio template class Thresholds { public: - /** - * @cond - */ - Thresholds() = default; - static_assert(std::is_floating_point::value); - /** - * @endcond - */ - /** * @param mm Median and MAD, typically from `find_median_mad::compute()`. * @param options Further options. @@ -132,8 +129,21 @@ class Thresholds { my_upper = choice.second; } + /** + * @param lower Lower threshold. + * @param upper Upper threshold. + */ + Thresholds(Float_ lower, Float_ upper) : my_lower(lower), my_upper(upper) {} + + /** + * Default constructor. + */ + Thresholds() = default; + private: - Float_ my_lower, my_upper; + Float_ my_lower = 0; + Float_ my_upper = 0; + static_assert(std::is_floating_point::value); public: /** @@ -217,49 +227,38 @@ class Thresholds { template class BlockThresholds { public: - /** - * @cond - */ - BlockThresholds() = default; - static_assert(std::is_floating_point::value); - /** - * @endcond - */ - /** * @param mm Median and MAD for each block, typically from `find_median_mad::compute_blocked()`. * @param options Further options. */ BlockThresholds(const std::vector >& mm, const Options& options) { size_t nblocks = mm.size(); - my_lower.reserve(nblocks); - my_upper.reserve(nblocks); - + my_thresholds.reserve(nblocks); for (size_t b = 0; b < nblocks; ++b) { - auto out = internal::choose(mm[b].median, mm[b].mad, options); - my_lower.push_back(out.first); - my_upper.push_back(out.second); + my_thresholds.emplace_back(mm[b], options); } } + /** + * @param thresholds Vector of thresholds, one per block. + */ + BlockThresholds(std::vector > thresholds) : my_thresholds(std::move(thresholds)) {} + + /** + * Default constructor. + */ + BlockThresholds() = default; + private: - std::vector my_lower, my_upper; + std::vector > my_thresholds; public: /** * @return Vector of lower thresholds, one per batch. * Cells where the relevant QC metric is below this threshold are considered to be low quality. */ - const std::vector& get_lower() const { - return my_lower; - } - - /** - * @return Vector of upper thresholds, one per batch. - * Cells where the relevant QC metric is above this threshold are considered to be low quality. - */ - const std::vector& get_upper() const { - return my_upper; + const std::vector >& get_thresholds() const { + return my_thresholds; } public: @@ -273,7 +272,7 @@ class BlockThresholds { */ template bool filter(Float_ x, Block_ b) const { - return x >= my_lower[b] && x <= my_upper[b]; // NaNs don't compare true in any comparison so they get auto-discarded here. + return my_thresholds[b].filter(x); } /** diff --git a/tests/src/choose_filter_thresholds.cpp b/tests/src/choose_filter_thresholds.cpp index a1b0756..be9c311 100644 --- a/tests/src/choose_filter_thresholds.cpp +++ b/tests/src/choose_filter_thresholds.cpp @@ -123,10 +123,10 @@ TEST(ChooseFilterThresholds, Blocked) { scran::choose_filter_thresholds::Options opt; scran::choose_filter_thresholds::BlockThresholds thresholds(mms, opt); - EXPECT_DOUBLE_EQ(thresholds.get_lower()[0], 0.4); - EXPECT_DOUBLE_EQ(thresholds.get_upper()[0], 1.6); - EXPECT_DOUBLE_EQ(thresholds.get_lower()[1], 1.7); - EXPECT_DOUBLE_EQ(thresholds.get_upper()[1], 2.3); + EXPECT_DOUBLE_EQ(thresholds.get_thresholds()[0].get_lower(), 0.4); + EXPECT_DOUBLE_EQ(thresholds.get_thresholds()[0].get_upper(), 1.6); + EXPECT_DOUBLE_EQ(thresholds.get_thresholds()[1].get_lower(), 1.7); + EXPECT_DOUBLE_EQ(thresholds.get_thresholds()[1].get_upper(), 2.3); std::vector metrics { 0, 0.5, 1.0, 1.5, 1.8, 2.1, 3.0, std::numeric_limits::quiet_NaN() }; @@ -190,3 +190,17 @@ TEST(ChooseFilterThresholds, Overwrite) { EXPECT_EQ(buffer, expected); } } + +TEST(ChooseFilterThresholds, Constructors) { + scran::choose_filter_thresholds::Thresholds def; + def = scran::choose_filter_thresholds::Thresholds(1, 2); + EXPECT_EQ(def.get_lower(), 1); + EXPECT_EQ(def.get_upper(), 2); + + scran::choose_filter_thresholds::BlockThresholds bdef; + bdef = scran::choose_filter_thresholds::BlockThresholds({ def }); + EXPECT_EQ(bdef.get_thresholds().size(), 1); + EXPECT_EQ(bdef.get_thresholds().front().get_lower(), 1); + EXPECT_EQ(bdef.get_thresholds().front().get_upper(), 2); +} +