Improved the docstrings.

libscran · Jul 21, 2024 · fd9854f · fd9854f
1 parent 53ee6c7
commit fd9854f
Show file tree

Hide file tree

Showing 3 changed files with 17 additions and 13 deletions.
diff --git a/include/scran_variances/choose_highly_variable_genes.hpp b/include/scran_variances/choose_highly_variable_genes.hpp
@@ -20,6 +20,8 @@ struct ChooseHighlyVariableGenesOptions {
     /**
      * Number of top genes to choose.
      * This should be positive.
+     * The actual number of genes may be smaller, if there are fewer genes in the dataset than `top`;
+     * or larger, if `ChooseHighlyVariableGenesOptions::keep_ties = true`.
      */
     size_t top = 4000;
 

diff --git a/include/scran_variances/fit_variance_trend.hpp b/include/scran_variances/fit_variance_trend.hpp
@@ -43,23 +43,24 @@ struct FitVarianceTrendOptions {
     double span = 0.3;
 
     /**
-     * Should a fixed-width constraint be applied to the LOWESS smoother?
-     * This forces each window to be a minimum width (see `FitVarianceTrendOptions::minimum_width`) and avoids problems with large differences in density.
+     * Should a minimum width constraint be applied to the LOWESS smoother?
+     * This forces each window to be a minimum width (see `FitVarianceTrendOptions::minimum_width`)
+     * to avoid overfitting from very small windows in high-density intervals.
      * For example, the default smoother performs poorly at high abundances where there are few genes.
      */
     bool use_minimum_width = false;
 
     /**
-     * Width of the window to use when `FitVarianceTrendOptions::use_minimum_width = true`.
-     * This should be relative to the range of `mean` values in `fit_variance_trend()`;
+     * Minimum width of the window to use when `FitVarianceTrendOptions::use_minimum_width = true`.
+     * This should be appropriate for the range of `mean` values used in `fit_variance_trend()`;
      * the default value is chosen based on the typical range in single-cell RNA-seq data.
      */
     double minimum_width = 1;
 
     /**
      * Minimum number of observations in each window when `FitVarianceTrendOptions::use_minimum_width = true`.
-     * This ensures that each window contains at least a given number of observations;
-     * if it does not, it is extended using the standard LOWESS logic until the minimum number is achieved.
+     * This ensures that each window contains at least a given number of observations for a good fit.
+     * If the minimum width window contains fewer observations, it is extended using the standard LOWESS logic until the minimum number is achieved.
      */
     int minimum_window_count = 200;
 

diff --git a/include/scran_variances/model_gene_variances.hpp b/include/scran_variances/model_gene_variances.hpp
@@ -28,14 +28,14 @@ struct ModelGeneVariancesOptions {
     FitVarianceTrendOptions fit_variance_trend_options;
 
     /**
-     * Weighting policy to use for averaging statistics across blocks.
-     * Only relevant for `model_gene_variances_blocked()` overloads where averaged outputs are requested.
+     * Policy to use for weighting the contribution from each block when computing the average for each statistic.
+     * Only relevant to `model_gene_variances_blocked()` overloads where averaged outputs are requested.
      */
     scran_blocks::WeightPolicy block_weight_policy = scran_blocks::WeightPolicy::VARIABLE;
 
     /**
      * Parameters for the variable block weights.
-     * Only relevant for `model_gene_variances_blocked()` overloads where averaged outputs are requested
+     * Only relevant to `model_gene_variances_blocked()` overloads where averaged outputs are requested
      * and `ModelGeneVariancesOptions::block_weight_policy = scran_blocks::WeightPolicy::VARIABLE`.
      */
     scran_blocks::VariableWeightParameters variable_block_weight_parameters; 
@@ -57,7 +57,7 @@ struct ModelGeneVariancesOptions {
  * @tparam Stat_ Floating-point type for the output statistics.
  *
  * In general, the pointers in this class should _not_ be set to `NULL`.
- * The only exception is for `ModelGeneVariancesBlockedBuffers::average`,
+ * The only exception is for instances of this class that are used as `ModelGeneVariancesBlockedBuffers::average`,
  * where setting the pointer to `NULL` will omit calculation of the corresponding average statistic.
  */
 template<typename Stat_>
@@ -437,7 +437,8 @@ void compute_average(
 
 /** 
  * Compute and model the per-feature variances from a log-expression matrix with blocking.
- * The mean and variance of each gene is computed separately for all cells in each block, and a separate trend is fitted to each block to obtain residuals (see `model_gene_variances()`).
+ * The mean and variance of each gene is computed separately for all cells in each block, 
+ * and a separate trend is fitted to each block to obtain residuals (see `model_gene_variances()`).
  * This ensures that sample and batch effects do not confound the variance estimates.
  *
  * We also compute the average of each statistic across blocks, using the weighting strategy specified in `ModelGeneVariancesOptions::block_weight_policy`.
@@ -522,10 +523,10 @@ void model_gene_variances_blocked(
 
 /** 
  * Here, we scan through a log-transformed normalized expression matrix and compute per-gene means and variances.
- * We then fits a trend to the variances with respect to the means using `fit_variance_trend()`.
+ * We then fit a trend to the variances with respect to the means using `fit_variance_trend()`.
  * We assume that most genes at any given abundance are not highly variable, such that the fitted value of the trend is interpreted as the "uninteresting" variance - 
  * this is mostly attributed to technical variation like sequencing noise, but can also represent constitutive biological noise like transcriptional bursting.
- * Under this assumption, the residual can be treated as a quantification of biologically interesting variation, and can be used to identify relevant features for downstream analyses.
+ * Under this assumption, the residual can be treated as a measure of biologically interesting variation, and can be used to identify relevant features for downstream analyses.
  *
  * @tparam Value_ Data type of the matrix.
  * @tparam Index_ Integer type for the row/column indices.