Migrate changes resulting from other biocpy packages, cleaning up doc…

…strings and bump package versions. (#74)
libscran · Jan 4, 2024 · 61d3620 · 61d3620
1 parent 83a5fc7
commit 61d3620
Show file tree

Hide file tree

Showing 41 changed files with 565 additions and 379 deletions.
diff --git a/docs/conf.py b/docs/conf.py
@@ -167,8 +167,17 @@
 # If this is True, todo emits a warning for each TODO entries. The default is False.
 todo_emit_warnings = True
 
-napoleon_use_rtype = True  # having a separate entry generally helps readability
-napoleon_use_param = True
+# napoleon_use_rtype = True  # having a separate entry generally helps readability
+# napoleon_use_param = True
+
+autodoc_default_options = {
+    "special-members": True,
+    "undoc-members": False,
+    "exclude-members": "__weakref__, __dict__, __str__, __module__, __init__",
+}
+
+autosummary_generate = True
+autosummary_imported_members = True
 
 
 # -- Options for HTML output -------------------------------------------------

diff --git a/setup.cfg b/setup.cfg
@@ -41,22 +41,22 @@ package_dir =
     =src
 
 # Require a min/specific Python version (comma-separated conditions)
-# python_requires = >=3.8
+python_requires = >=3.8
 
 # Add here dependencies of your project (line-separated), e.g. requests>=2.2,<3.0.
 # Version specifiers like >=2.2,<3.0 avoid problems due to API changes in
 # new major versions. This works if the required packages follow Semantic Versioning.
 # For more information, check out https://semver.org/.
 install_requires =
     importlib-metadata; python_version<"3.8"
-    mattress>=0.0.7
-    assorthead>=0.0.10
+    mattress>=0.1.4
+    assorthead>=0.0.11
     numpy
-    BiocFrame>=0.3.1
+    biocframe>=0.5.0
     biocutils
     igraph
-    singlecellexperiment>=0.3.0
-    summarizedexperiment
+    singlecellexperiment>=0.4.0
+    summarizedexperiment>=0.4.0
     delayedarray
 
 [options.packages.find]

diff --git a/src/scranpy/_utils.py b/src/scranpy/_utils.py
@@ -1,7 +1,8 @@
-from typing import Sequence, Callable, Any, Tuple, Union
-from biocutils import factor, match
+from typing import Sequence, Tuple, Union
+
+from biocutils import Factor, match
 from mattress import TatamiNumericPointer, tatamize
-from numpy import bool_, int32, int_, ndarray, uint8, uintp, zeros, array
+import numpy as np
 from summarizedexperiment import SummarizedExperiment
 
 __author__ = "ltla, jkanche"
@@ -12,25 +13,25 @@
 MatrixTypes = Union[TatamiNumericPointer, SummarizedExperiment]
 
 
-def factorize(x: Sequence) -> Tuple[list, ndarray]:
-    lev, ind = factor(x)
-    return lev, array(ind, int32)
+def factorize(x: Sequence) -> Tuple[list, np.ndarray]:
+    _factor = Factor.from_sequence(x, sort_levels=False)
+    return _factor.levels, np.array(_factor.codes, np.int32)
 
 
-def to_logical(selection: Sequence, length: int, dtype=uint8) -> ndarray:
-    output = zeros((length,), dtype=dtype)
+def to_logical(selection: Sequence, length: int, dtype=np.uint8) -> np.ndarray:
+    output = np.zeros((length,), dtype=dtype)
 
     if isinstance(selection, range) or isinstance(selection, slice):
         output[selection] = 1
         return output
 
-    if isinstance(selection, ndarray):
-        if selection.dtype == bool_:
+    if isinstance(selection, np.ndarray):
+        if selection.dtype == np.bool_:
             if len(selection) != length:
                 raise ValueError("length of 'selection' is not equal to 'length'.")
             output[selection] = 1
             return output
-        elif selection.dtype == int_:
+        elif selection.dtype == np.int_:
             output[selection] = 1
             return output
         else:
@@ -73,14 +74,17 @@ def match_lists(x, y):
 
 
 def tatamize_input(x: MatrixTypes, assay_type: Union[str, int]) -> TatamiNumericPointer:
-    if isinstance(x, SummarizedExperiment):
+    if isinstance(x, TatamiNumericPointer):
+        return x
+
+    if issubclass(type(x), SummarizedExperiment):
         x = x.assay(assay_type)
     return tatamize(x)
 
 
 def create_pointer_array(arrs):
     num = len(arrs)
-    output = ndarray((num,), dtype=uintp)
+    output = np.ndarray((num,), dtype=np.uintp)
 
     if isinstance(arrs, list):
         for i in range(num):

diff --git a/src/scranpy/aggregation/aggregate_across_cells.py b/src/scranpy/aggregation/aggregate_across_cells.py
@@ -189,6 +189,6 @@ def aggregate_across_cells(
     for i, x in enumerate(levels):
         reported_factors[factor_names[i]] = x
     reported_factors["counts"] = counts
-    output.col_data = BiocFrame(reported_factors)
+    output.column_data = BiocFrame(reported_factors)
 
     return output
diff --git a/src/scranpy/analyze/AnalyzeOptions.py b/src/scranpy/analyze/AnalyzeOptions.py
@@ -1,53 +1,54 @@
 from dataclasses import dataclass, field
 from typing import Optional, Sequence
 
+from .. import batch_correction as correct
 from .. import clustering as clust
 from .. import dimensionality_reduction as dimred
 from .. import feature_selection as feat
 from .. import marker_detection as mark
 from .. import nearest_neighbors as nn
 from .. import normalization as norm
 from .. import quality_control as qc
-from .. import batch_correction as correct
 
 
 @dataclass
 class MiscellaneousOptions:
     """Miscellaneous options for :py:meth:`~scranpy.analyze.analyze.analyze`.
 
     Attributes:
-        cell_names (Sequence[str], optional):
+        cell_names:
             Names for all cells in the dataset, to be added to any per-cell data frames.
             This should have the same length as the number of columns in each data matrix.
 
-        rna_feature_names (Sequence[str], optional):
+        rna_feature_names:
             Names for all features in the RNA data.
             This should have the same length as the number of rows in the RNA count matrix.
 
-        adt_feature_names (Sequence[str], optional):
+        adt_feature_names:
             Names for all tags in the ADT data.
             This should have the same length as the number of rows in the ADT count matrix.
 
-        crispr_feature_names (Sequence[str], optional):
+        crispr_feature_names:
             Names for all guides in the CRISPR data.
             This should have the same length as the number of rows in the CRISPR count matrix.
 
-        filter_on_rna_qc (bool):
+        filter_on_rna_qc:
             Whether to filter cells on the RNA-based quality control metrics,
             when RNA data is available.
 
-        filter_on_adt_qc (bool):
+        filter_on_adt_qc:
             Whether to filter cells on the ADT-based quality control metrics,
             when ADT data is available.
 
-        filter_on_crispr_qc (bool):
+        filter_on_crispr_qc:
             Whether to filter cells on the CRISPR-based quality control metrics,
             when CRISPR data is available.
 
-        snn_graph_multilevel_resolution (float):
+        snn_graph_multilevel_resolution:
             Resolution to use for multi-level clustering of the SNN graph.
 
-        block (Sequence, optional): Block assignment for each cell.
+        block:
+            Block assignment for each cell.
             This should have length equal to the total number of cells in the dataset, before any quality control
             is applied.
     """
@@ -74,104 +75,104 @@ class AnalyzeOptions:
     check out the setter methods of this class for more details.
 
     Attributes:
-        per_cell_rna_qc_metrics_options (PerCellRnaQcMetricsOptions):
+        per_cell_rna_qc_metrics_options:
             Options to pass to :py:meth:`~scranpy.quality_control.per_cell_rna_qc_metrics.per_cell_rna_qc_metrics`.
 
-        suggest_rna_qc_filters_options (SuggestRnaQcFiltersOptions):
+        suggest_rna_qc_filters_options:
             Options to pass to :py:meth:`~scranpy.quality_control.suggest_rna_qc_filters.suggest_rna_qc_filters`.
 
-        create_rna_qc_filter_options (CreateRnaQcFilterOptions):
+        create_rna_qc_filter_options:
             Options to pass to :py:meth:`~scranpy.quality_control.create_rna_qc_filter.create_rna_qc_filter`.
 
-        per_cell_adt_qc_metrics_options (PerCellRnaQcMetricsOptions):
+        per_cell_adt_qc_metrics_options:
             Options to pass to :py:meth:`~scranpy.quality_control.per_cell_adt_metrics.per_cell_adt_qc_metrics`.
 
-        suggest_adt_qc_filters_options (SuggestRnaQcFiltersOptions):
+        suggest_adt_qc_filters_options:
             Options to pass to :py:meth:`~scranpy.quality_control.suggest_adt_qc_filters.suggest_adt_qc_filters`.
 
-        create_adt_qc_filter_options (CreateRnaQcFilterOptions):
+        create_adt_qc_filter_options:
             Options to pass to :py:meth:`~scranpy.quality_control.create_adt_qc_filter.create_adt_qc_filter`.
 
-        per_cell_crispr_qc_metrics_options (PerCellRnaQcMetricsOptions):
+        per_cell_crispr_qc_metrics_options:
             Options to pass to
             :py:meth:`~scranpy.quality_control.per_cell_crispr_qc_metrics.per_cell_crispr_qc_metrics`.
 
-        suggest_crispr_qc_filters_options (SuggestRnaQcFiltersOptions):
+        suggest_crispr_qc_filters_options:
             Options to pass to :py:meth:`~scranpy.quality_control.suggest_crispr_qc_filters.suggest_crispr_qc_filters`.
 
-        create_crispr_qc_filter_options (CreateRnaQcFilterOptions):
+        create_crispr_qc_filter_options:
             Options to pass to :py:meth:`~scranpy.quality_control.create_crispr_qc_filter.create_crispr_qc_filter`.
 
-        filter_cells_options (FilterCellsOptions):
+        filter_cells_options:
             Options to pass to :py:meth:`~scranpy.quality_control.filter_cells.filter_cells`.
 
-        rna_log_norm_counts_options (LogNormCountsOptions):
+        rna_log_norm_counts_options:
             Options to pass to :py:meth:`~scranpy.normalization.log_norm_counts.log_norm_counts`
             for the RNA count matrix.
 
-        grouped_size_factors_options (GroupedSizeFactorsOptions):
+        grouped_size_factors_options:
             Options to pass to :py:meth:`~scranpy.normalization.grouped_size_factors.grouped_size_factors`
             to compute ADT size factors.
 
-        adt_log_norm_counts_options (LogNormCountsOptions):
+        adt_log_norm_counts_options:
             Options to pass to :py:meth:`~scranpy.normalization.log_norm_counts.log_norm_counts`
             for the ADT count matrix.
 
-        crispr_log_norm_counts_options (LogNormCountsOptions):
+        crispr_log_norm_counts_options:
             Options to pass to :py:meth:`~scranpy.normalization.log_norm_counts.log_norm_counts`
             for the CRISPR count matrix.
 
-        choose_hvgs_options (ChooseHvgsOptions):
+        choose_hvgs_options:
             Options to pass to :py:meth:`~scranpy.feature_selection.choose_hvgs.choose_hvgs`
             to choose highly variable genes for the RNA data.
 
-        model_gene_variances_options (ModelGeneVariancesOptions):
+        model_gene_variances_options:
             Options to pass to :py:meth:`~scranpy.feature_selection.model_gene_variances.model_gene_variances`
             to model per-gene variances for the RNA data.
 
-        rna_run_pca_options (RunPcaOptions):
+        rna_run_pca_options:
             Options to pass to :py:meth:`~scranpy.dimensionality_reduction.run_pca.run_pca`
             for the RNA log-expression matrix.
 
-        adt_run_pca_options (RunPcaOptions):
+        adt_run_pca_options:
             Options to pass to :py:meth:`~scranpy.dimensionality_reduction.run_pca.run_pca`
             for the ADT log-expression matrix.
 
-        crispr_run_pca_options (RunPcaOptions):
+        crispr_run_pca_options:
             Options to pass to :py:meth:`~scranpy.dimensionality_reduction.run_pca.run_pca`
             for the CRISPR log-expression matrix.
 
-        mnn_correct_options (MnnCorrectOptions):
+        mnn_correct_options:
             Options to pass to :py:meth:`~scranpy.batch_correction.mnn_correct.mnn_correct`.
 
-        build_neighbor_index_options (BuildNeighborIndexOptions):
+        build_neighbor_index_options:
             Options to pass to :py:meth:`~scranpy.nearest_neighbors.build_neighbor_index.build_neighbor_index`.
 
-        find_nearest_neighbors_options (FindNearestNeighborsOptions):
+        find_nearest_neighbors_options:
             Options to pass to :py:meth:`~scranpy.nearest_neighbors.find_nearest_neighbors.find_nearest_neighbors`.
 
-        run_tsne_options (RunTsneOptions):
+        run_tsne_options:
             Options to pass to :py:meth:`~scranpy.dimensionality_reduction.run_tsne.run_tsne`.
 
-        run_umap_options (RunUmapOptions):
+        run_umap_options:
             Options to pass to :py:meth:`~scranpy.dimensionality_reduction.run_umap.run_umap`.
 
-        build_snn_graph_options (BuildSnnGraphOptions):
+        build_snn_graph_options:
             Options to pass to :py:meth:`~scranpy.clustering.build_snn_graph.build_snn_graph`.
 
-        rna_score_markers_options (ScoreMarkersOptions):
+        rna_score_markers_options:
             Options to pass to :py:meth:`~scranpy.marker_detection.score_markers.score_markers`
             for the RNA log-expression values.
 
-        adt_score_markers_options (ScoreMarkersOptions):
+        adt_score_markers_options:
             Options to pass to :py:meth:`~scranpy.marker_detection.score_markers.score_markers`
             for the ADT log-abundances.
 
-        crispr_score_markers_options (ScoreMarkersOptions):
+        crispr_score_markers_options:
             Options to pass to :py:meth:`~scranpy.marker_detection.score_markers.score_markers`
             for the CRISPR log-abundances.
 
-        miscellaneous_options (MiscellaneousOptions):
+        miscellaneous_options:
             Further options that are not associated with any single function call.
     """
 
@@ -303,7 +304,7 @@ def set_seed(self, seed: int = 42):
         :py:meth:`~scranpy.dimensionality_reduction.run_umap.run_umap`.
 
         Args:
-            seed (int, optional):
+            seed:
                 Seed for random number generation.
         """
         self.run_pca_options.set_seed(seed)
@@ -328,7 +329,8 @@ def set_threads(self, num_threads: int = 1):
         In all cases, thread utilization will not exceed the limit specified here in ``num_threads``.
 
         Args:
-            num_threads (int, optional): Number of threads. Defaults to 1.
+            num_threads:
+                Number of threads. Defaults to 1.
         """
         self.per_cell_rna_qc_metrics_options.set_threads(num_threads)
         self.log_norm_counts_options.set_threads(num_threads)