Skip to content

Commit

Permalink
Streamlined the BlockThresholds definition by re-using Thresholds.
Browse files Browse the repository at this point in the history
This allows us to easily create a BlockThresholds from an existing Thresholds,
which cuts down on redundant code. It also supports applications that need to
return a constant-typed object from code that may or may not use blocks; in
such cases, we can just promote a Thresholds object to BlockThresholds.
  • Loading branch information
LTLA committed Jun 28, 2024
1 parent 6fd3e22 commit cb2c2f7
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 41 deletions.
73 changes: 36 additions & 37 deletions include/scran/choose_filter_thresholds.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ namespace scran {
* Any outlier values are indicative of low-quality cells that should be filtered out.
* Given an array of values, outliers are defined as those that are more than some number of median absolute deviations (MADs) from the median value.
* Outliers can be defined in both directions or just a single direction, depending on the interpretation of the QC metric.
*
* For datasets with multiple blocks, we can also compute block-specific thresholds for each metric.
* This assumes that differences in the metric distributions between blocks are driven by uninteresting causes (e.g., differences in sequencing depth);
* variable thresholds can adapt to each block's distribution for effective removal of outliers.
* However, if the differences in the distributions between blocks are interesting,
* it may be preferable to ignore the blocking factor so that the MADs are correctly increased to reflect that variation.
*/
namespace choose_filter_thresholds {

Expand Down Expand Up @@ -113,15 +119,6 @@ std::pair<Float_, Float_> choose(Float_ median, Float_ mad, const Options& optio
template<typename Float_>
class Thresholds {
public:
/**
* @cond
*/
Thresholds() = default;
static_assert(std::is_floating_point<Float_>::value);
/**
* @endcond
*/

/**
* @param mm Median and MAD, typically from `find_median_mad::compute()`.
* @param options Further options.
Expand All @@ -132,8 +129,21 @@ class Thresholds {
my_upper = choice.second;
}

/**
* @param lower Lower threshold.
* @param upper Upper threshold.
*/
Thresholds(Float_ lower, Float_ upper) : my_lower(lower), my_upper(upper) {}

/**
* Default constructor.
*/
Thresholds() = default;

private:
Float_ my_lower, my_upper;
Float_ my_lower = 0;
Float_ my_upper = 0;
static_assert(std::is_floating_point<Float_>::value);

public:
/**
Expand Down Expand Up @@ -217,49 +227,38 @@ class Thresholds {
template<typename Float_>
class BlockThresholds {
public:
/**
* @cond
*/
BlockThresholds() = default;
static_assert(std::is_floating_point<Float_>::value);
/**
* @endcond
*/

/**
* @param mm Median and MAD for each block, typically from `find_median_mad::compute_blocked()`.
* @param options Further options.
*/
BlockThresholds(const std::vector<find_median_mad::Results<Float_> >& mm, const Options& options) {
size_t nblocks = mm.size();
my_lower.reserve(nblocks);
my_upper.reserve(nblocks);

my_thresholds.reserve(nblocks);
for (size_t b = 0; b < nblocks; ++b) {
auto out = internal::choose(mm[b].median, mm[b].mad, options);
my_lower.push_back(out.first);
my_upper.push_back(out.second);
my_thresholds.emplace_back(mm[b], options);
}
}

/**
* @param thresholds Vector of thresholds, one per block.
*/
BlockThresholds(std::vector<Thresholds<Float_> > thresholds) : my_thresholds(std::move(thresholds)) {}

/**
* Default constructor.
*/
BlockThresholds() = default;

private:
std::vector<Float_> my_lower, my_upper;
std::vector<Thresholds<Float_> > my_thresholds;

public:
/**
* @return Vector of lower thresholds, one per batch.
* Cells where the relevant QC metric is below this threshold are considered to be low quality.
*/
const std::vector<Float_>& get_lower() const {
return my_lower;
}

/**
* @return Vector of upper thresholds, one per batch.
* Cells where the relevant QC metric is above this threshold are considered to be low quality.
*/
const std::vector<Float_>& get_upper() const {
return my_upper;
const std::vector<Thresholds<Float_> >& get_thresholds() const {
return my_thresholds;
}

public:
Expand All @@ -273,7 +272,7 @@ class BlockThresholds {
*/
template<typename Block_>
bool filter(Float_ x, Block_ b) const {
return x >= my_lower[b] && x <= my_upper[b]; // NaNs don't compare true in any comparison so they get auto-discarded here.
return my_thresholds[b].filter(x);
}

/**
Expand Down
22 changes: 18 additions & 4 deletions tests/src/choose_filter_thresholds.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,10 @@ TEST(ChooseFilterThresholds, Blocked) {
scran::choose_filter_thresholds::Options opt;
scran::choose_filter_thresholds::BlockThresholds<double> thresholds(mms, opt);

EXPECT_DOUBLE_EQ(thresholds.get_lower()[0], 0.4);
EXPECT_DOUBLE_EQ(thresholds.get_upper()[0], 1.6);
EXPECT_DOUBLE_EQ(thresholds.get_lower()[1], 1.7);
EXPECT_DOUBLE_EQ(thresholds.get_upper()[1], 2.3);
EXPECT_DOUBLE_EQ(thresholds.get_thresholds()[0].get_lower(), 0.4);
EXPECT_DOUBLE_EQ(thresholds.get_thresholds()[0].get_upper(), 1.6);
EXPECT_DOUBLE_EQ(thresholds.get_thresholds()[1].get_lower(), 1.7);
EXPECT_DOUBLE_EQ(thresholds.get_thresholds()[1].get_upper(), 2.3);

std::vector<double> metrics { 0, 0.5, 1.0, 1.5, 1.8, 2.1, 3.0, std::numeric_limits<double>::quiet_NaN() };

Expand Down Expand Up @@ -190,3 +190,17 @@ TEST(ChooseFilterThresholds, Overwrite) {
EXPECT_EQ(buffer, expected);
}
}

TEST(ChooseFilterThresholds, Constructors) {
scran::choose_filter_thresholds::Thresholds<double> def;
def = scran::choose_filter_thresholds::Thresholds<double>(1, 2);
EXPECT_EQ(def.get_lower(), 1);
EXPECT_EQ(def.get_upper(), 2);

scran::choose_filter_thresholds::BlockThresholds<double> bdef;
bdef = scran::choose_filter_thresholds::BlockThresholds<double>({ def });
EXPECT_EQ(bdef.get_thresholds().size(), 1);
EXPECT_EQ(bdef.get_thresholds().front().get_lower(), 1);
EXPECT_EQ(bdef.get_thresholds().front().get_upper(), 2);
}

0 comments on commit cb2c2f7

Please sign in to comment.