Updates for the latest version of the tatami library. (#271)

Require the minimum version for the latest beachmat.
SingleR-inc · May 22, 2024 · 84a6e55 · 84a6e55
1 parent 013d7f4
commit 84a6e55
Show file tree

Hide file tree

Showing 13 changed files with 105 additions and 94 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: SingleR
 Title: Reference-Based Single-Cell RNA-Seq Annotation
-Version: 2.7.0
-Date: 2024-02-13
+Version: 2.7.1
+Date: 2024-05-22
 Authors@R: c(person("Dvir", "Aran", email="[email protected]", role=c("aut", "cph")),
     person("Aaron", "Lun", email="[email protected]", role=c("ctb", "cre")),
     person("Daniel", "Bunis", role="ctb"),
@@ -23,7 +23,7 @@ Imports:
     stats,
     utils,
     Rcpp,
-    beachmat,
+    beachmat (>= 2.21.1),
     parallel
 LinkingTo:
     Rcpp,

diff --git a/R/getClassicMarkers.R b/R/getClassicMarkers.R
@@ -78,7 +78,6 @@ getClassicMarkers <- function(ref, labels, assay.type="logcounts", check.missing
 
         flabels <- factor(labels[[i]])
         gm <- grouped_medians(curptr, as.integer(flabels) - 1L, nlevels(flabels), nthreads = num.threads)
-        gm <- t(gm)
         colnames(gm) <- levels(flabels)
         ref[[i]] <- gm
     }

diff --git a/inst/include/singlepp/BasicBuilder.hpp b/inst/include/singlepp/BasicBuilder.hpp
@@ -36,7 +36,7 @@ class BasicBuilder {
         /**
          * See `set_top()` for details.
          */
-        static constexpr int top = 20;
+        static constexpr int top = -1;
 
         /**
          * See `set_approximate()` for details.
@@ -204,13 +204,13 @@ class BasicBuilder {
 
         /**
          * Row indices of test dataset, specifying the features in the intersection.
-         * This has the same length as `ref_indices`.
+         * This has the same length as `ref_subset`, where corresponding entries refer to the same features in the respective datasets.
          */
         std::vector<int> mat_subset;
 
         /**
          * Row indices of reference dataset, specifying the features in the intersection.
-         * This has the same length as `mat_indices`.
+         * This has the same length as `mat_subset`, where corresponding entries refer to the same features in the respective datasets.
          */
         std::vector<int> ref_subset;
 
@@ -221,6 +221,17 @@ class BasicBuilder {
             return references.size();
         }
 
+        /**
+         * @return Number of profiles in this reference.
+         */
+        size_t num_profiles() const {
+            size_t n = 0;
+            for (const auto& ref : references) {
+                n += ref.ranked.size();
+            }
+            return n;
+        }
+
         /**
          * @cond
          */

diff --git a/inst/include/singlepp/BasicScorer.hpp b/inst/include/singlepp/BasicScorer.hpp
@@ -116,15 +116,13 @@ class BasicScorer {
      * containing the index of the row of `mat` corresponding to each gene in `built.subset`.
      * That is, row `mat_subset[i]` in `mat` should be the same gene as row `built.subset[i]` in the reference matrix.
      * @param[out] best Pointer to an array of length equal to the number of columns in `mat`.
-     * This is filled with the index of the assigned label for each cell.
+     * On output, this is filled with the index of the assigned label for each cell.
      * @param[out] scores Vector of pointers to arrays of length equal to the number of columns in `mat`.
-     * This is filled with the (non-fine-tuned) score for each label for each cell.
-     * Any pointer may be `NULL` in which case the scores for that label will not be saved.
+     * On output, this is filled with the (non-fine-tuned) score for each label for each cell.
+     * Any pointer may be `NULL` in which case the scores for that label will not be reported.
      * @param[out] delta Pointer to an array of length equal to the number of columns in `mat`.
-     * This is filled with the difference between the highest and second-highest scores, possibly after fine-tuning.
+     * On output, this is filled with the difference between the highest and second-highest scores, possibly after fine-tuning.
      * This may also be `NULL` in which case the deltas are not reported.
-     *
-     * @return `best`, `scores` and `delta` are filled with their output values.
      */
     void run(const tatami::Matrix<double, int>* mat, const BasicBuilder::Prebuilt& built, const int* mat_subset, int* best, std::vector<double*>& scores, double* delta) const {
         annotate_cells_simple(
@@ -149,15 +147,13 @@ class BasicScorer {
      * This should have the same order and identity of genes as the reference matrix used to create `built`.
      * @param built An object produced by `BasicBuilder::build()`.
      * @param[out] best Pointer to an array of length equal to the number of columns in `mat`.
-     * This is filled with the index of the assigned label for each cell.
+     * On output, this is filled with the index of the assigned label for each cell.
      * @param[out] scores Vector of pointers to arrays of length equal to the number of columns in `mat`.
-     * This is filled with the (non-fine-tuned) score for each label for each cell.
-     * Any pointer may be `NULL` in which case the scores for that label will not be saved.
+     * On output, this is filled with the (non-fine-tuned) score for each label for each cell.
+     * Any pointer may be `NULL` in which case the scores for that label will not be reported.
      * @param[out] delta Pointer to an array of length equal to the number of columns in `mat`.
-     * This is filled with the difference between the highest and second-highest scores, possibly after fine-tuning.
+     * On output, this is filled with the difference between the highest and second-highest scores, possibly after fine-tuning.
      * This may also be `NULL` in which case the deltas are not reported.
-     *
-     * @return `best`, `scores` and `delta` are filled with their output values.
      */
     void run(const tatami::Matrix<double, int>* mat, const BasicBuilder::Prebuilt& built, int* best, std::vector<double*>& scores, double* delta) const {
         run(mat, built, built.subset.data(), best, scores, delta);
@@ -243,15 +239,13 @@ class BasicScorer {
      * @param mat Expression matrix of the test dataset, where rows are genes and columns are cells.
      * @param built An object produced by `build()` with intersections.
      * @param[out] best Pointer to an array of length equal to the number of columns in `mat`.
-     * This is filled with the index of the assigned label for each cell.
+     * On output, this is filled with the index of the assigned label for each cell.
      * @param[out] scores Vector of pointers to arrays of length equal to the number of columns in `mat`.
-     * This is filled with the (non-fine-tuned) score for each label for each cell.
-     * Any pointer may be `NULL` in which case the scores for that label will not be saved.
+     * On output, this is filled with the (non-fine-tuned) score for each label for each cell.
+     * Any pointer may be `NULL` in which case the scores for that label will not be reported.
      * @param[out] delta Pointer to an array of length equal to the number of columns in `mat`.
-     * This is filled with the difference between the highest and second-highest scores, possibly after fine-tuning.
+     * On output, tkkhis is filled with the difference between the highest and second-highest scores, possibly after fine-tuning.
      * This may also be `NULL` in which case the deltas are not reported.
-     * 
-     * @return `best`, `scores` and `delta` are filled with their output values.
      */
     void run(
         const tatami::Matrix<double, int>* mat, 

diff --git a/inst/include/singlepp/ChooseClassicMarkers.hpp b/inst/include/singlepp/ChooseClassicMarkers.hpp
@@ -69,7 +69,7 @@ class ChooseClassicMarkers {
      *
      * @return An appropriate number of markers for each pairwise comparison.
      *
-     * The exact expression is defined as $500 (\frac{2}{3})^{\log_2{N}}$ for $N$ labels,
+     * The exact expression is defined as \f$500 (\frac{2}{3})^{\log_2{N}}\f$ for \f$N\f$ labels,
      * which steadily decreases the markers per comparison as the number of labels increases.
      * This aims to avoid an excessive number of features when dealing with references with many labels.
      */
@@ -165,7 +165,7 @@ class ChooseClassicMarkers {
 
         int actual_number = number;
         if (number < 0) {
-            actual_number = std::round(500.0 * std::pow(2.0/3.0, std::log(static_cast<double>(nlabels)) / std::log(2.0)));
+            actual_number = number_of_markers(nlabels);
         } 
         if (actual_number > static_cast<int>(ngenes)) {
             actual_number = ngenes;
@@ -182,7 +182,7 @@ class ChooseClassicMarkers {
             typedef typename  Matrix::value_type Value_;
             typedef typename  Matrix::index_type Index_;
             std::vector<Value_> rbuffer(ngenes), lbuffer(ngenes);
-            std::vector<std::shared_ptr<tatami::FullDenseExtractor<Value_, Index_> > > rworks(nrefs), lworks(nrefs);
+            std::vector<std::shared_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > > rworks(nrefs), lworks(nrefs);
 
 #ifndef SINGLEPP_CUSTOM_PARALLEL
             #pragma omp for

diff --git a/inst/include/singlepp/Classifier.hpp b/inst/include/singlepp/Classifier.hpp
@@ -209,16 +209,14 @@ class Classifier {
      * The smallest label should be 0 and the largest label should be equal to the total number of unique labels minus 1.
      * @param markers A vector of vectors of ranked marker genes for each pairwise comparison between labels, see `Markers` for more details.
      * @param[out] best Pointer to an array of length equal to the number of columns in `mat`.
-     * This is filled with the index of the assigned label for each cell.
+     * On output, this is filled with the index of the assigned label for each cell.
      * @param[out] scores Vector of pointers of length equal to the number of labels.
      * Each pointer should point to an array of length equal to the number of columns in `mat`.
-     * This is filled with the (non-fine-tuned) score for that label for each cell.
-     * Any pointer may be `NULL` in which case the scores for that label will not be saved.
+     * On output, this is filled with the (non-fine-tuned) score for that label for each cell.
+     * Any pointer may be `NULL` in which case the scores for that label will not be reported.
      * @param[out] delta Pointer to an array of length equal to the number of columns in `mat`.
-     * This is filled with the difference between the highest and second-highest scores, possibly after fine-tuning.
+     * On output, this is filled with the difference between the highest and second-highest scores, possibly after fine-tuning.
      * This may also be `NULL` in which case the deltas are not reported.
-     *
-     * @return `best`, `scores` and `delta` are filled with their output values.
      */
     void run(const tatami::Matrix<double, int>* mat, const tatami::Matrix<double, int>* ref, const int* labels, Markers markers, int* best, std::vector<double*>& scores, double* delta) const {
         auto prebuilt = build_reference(ref, labels, std::move(markers));
@@ -256,17 +254,15 @@ class Classifier {
      * The smallest label should be 0 and the largest label should be equal to the total number of unique labels minus 1.
      * @param markers A vector of vectors of ranked marker genes for each pairwise comparison between labels, see `Markers` for more details.
      * @param[out] best Pointer to an array of length equal to the number of columns in `mat`.
-     * This is filled with the index of the assigned label for each cell.
+     * On output, this is filled with the index of the assigned label for each cell.
      * @param[out] scores Vector of pointers of length equal to the number of labels.
      * Each pointer should point to an array of length equal to the number of columns in `mat`.
-     * This is filled with the (non-fine-tuned) score for that label for each cell.
-     * Any pointer may be `NULL` in which case the scores for that label will not be saved.
+     * On output, this is filled with the (non-fine-tuned) score for that label for each cell.
+     * Any pointer may be `NULL` in which case the scores for that label will not be reported.
      * @param[out] delta Pointer to an array of length equal to the number of columns in `mat`.
-     * This is filled with the difference between the highest and second-highest scores, possibly after fine-tuning.
+     * On output, this is filled with the difference between the highest and second-highest scores, possibly after fine-tuning.
      * This may also be `NULL` in which case the deltas are not reported.
      * 
-     * @return `best`, `scores` and `delta` are filled with their output values.
-     * 
      * This version of `run()` applies an intersection to find the common genes between `mat` and `ref`, based on their shared values in `mat_id` and `ref_id`.
      * The annotation is then performed using only the subset of common genes.
      * The aim is to easily accommodate differences in feature annotation between the test and reference profiles.