diff --git a/docs/conf.py b/docs/conf.py index 6b7399f..2c82dea 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -167,8 +167,17 @@ # If this is True, todo emits a warning for each TODO entries. The default is False. todo_emit_warnings = True -napoleon_use_rtype = True # having a separate entry generally helps readability -napoleon_use_param = True +# napoleon_use_rtype = True # having a separate entry generally helps readability +# napoleon_use_param = True + +autodoc_default_options = { + "special-members": True, + "undoc-members": False, + "exclude-members": "__weakref__, __dict__, __str__, __module__, __init__", +} + +autosummary_generate = True +autosummary_imported_members = True # -- Options for HTML output ------------------------------------------------- diff --git a/setup.cfg b/setup.cfg index 1b1c56a..076ae5e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -41,7 +41,7 @@ package_dir = =src # Require a min/specific Python version (comma-separated conditions) -# python_requires = >=3.8 +python_requires = >=3.8 # Add here dependencies of your project (line-separated), e.g. requests>=2.2,<3.0. # Version specifiers like >=2.2,<3.0 avoid problems due to API changes in @@ -49,14 +49,14 @@ package_dir = # For more information, check out https://semver.org/. install_requires = importlib-metadata; python_version<"3.8" - mattress>=0.0.7 - assorthead>=0.0.10 + mattress>=0.1.4 + assorthead>=0.0.11 numpy - BiocFrame>=0.3.1 + biocframe>=0.5.0 biocutils igraph - singlecellexperiment>=0.3.0 - summarizedexperiment + singlecellexperiment>=0.4.0 + summarizedexperiment>=0.4.0 delayedarray [options.packages.find] diff --git a/src/scranpy/_utils.py b/src/scranpy/_utils.py index 55e5497..9284c3e 100644 --- a/src/scranpy/_utils.py +++ b/src/scranpy/_utils.py @@ -1,7 +1,8 @@ -from typing import Sequence, Callable, Any, Tuple, Union -from biocutils import factor, match +from typing import Sequence, Tuple, Union + +from biocutils import Factor, match from mattress import TatamiNumericPointer, tatamize -from numpy import bool_, int32, int_, ndarray, uint8, uintp, zeros, array +import numpy as np from summarizedexperiment import SummarizedExperiment __author__ = "ltla, jkanche" @@ -12,25 +13,25 @@ MatrixTypes = Union[TatamiNumericPointer, SummarizedExperiment] -def factorize(x: Sequence) -> Tuple[list, ndarray]: - lev, ind = factor(x) - return lev, array(ind, int32) +def factorize(x: Sequence) -> Tuple[list, np.ndarray]: + _factor = Factor.from_sequence(x, sort_levels=False) + return _factor.levels, np.array(_factor.codes, np.int32) -def to_logical(selection: Sequence, length: int, dtype=uint8) -> ndarray: - output = zeros((length,), dtype=dtype) +def to_logical(selection: Sequence, length: int, dtype=np.uint8) -> np.ndarray: + output = np.zeros((length,), dtype=dtype) if isinstance(selection, range) or isinstance(selection, slice): output[selection] = 1 return output - if isinstance(selection, ndarray): - if selection.dtype == bool_: + if isinstance(selection, np.ndarray): + if selection.dtype == np.bool_: if len(selection) != length: raise ValueError("length of 'selection' is not equal to 'length'.") output[selection] = 1 return output - elif selection.dtype == int_: + elif selection.dtype == np.int_: output[selection] = 1 return output else: @@ -73,14 +74,17 @@ def match_lists(x, y): def tatamize_input(x: MatrixTypes, assay_type: Union[str, int]) -> TatamiNumericPointer: - if isinstance(x, SummarizedExperiment): + if isinstance(x, TatamiNumericPointer): + return x + + if issubclass(type(x), SummarizedExperiment): x = x.assay(assay_type) return tatamize(x) def create_pointer_array(arrs): num = len(arrs) - output = ndarray((num,), dtype=uintp) + output = np.ndarray((num,), dtype=np.uintp) if isinstance(arrs, list): for i in range(num): diff --git a/src/scranpy/aggregation/aggregate_across_cells.py b/src/scranpy/aggregation/aggregate_across_cells.py index 0f9c277..e670da6 100644 --- a/src/scranpy/aggregation/aggregate_across_cells.py +++ b/src/scranpy/aggregation/aggregate_across_cells.py @@ -189,6 +189,6 @@ def aggregate_across_cells( for i, x in enumerate(levels): reported_factors[factor_names[i]] = x reported_factors["counts"] = counts - output.col_data = BiocFrame(reported_factors) + output.column_data = BiocFrame(reported_factors) return output diff --git a/src/scranpy/analyze/AnalyzeOptions.py b/src/scranpy/analyze/AnalyzeOptions.py index 654f069..5342066 100644 --- a/src/scranpy/analyze/AnalyzeOptions.py +++ b/src/scranpy/analyze/AnalyzeOptions.py @@ -1,6 +1,7 @@ from dataclasses import dataclass, field from typing import Optional, Sequence +from .. import batch_correction as correct from .. import clustering as clust from .. import dimensionality_reduction as dimred from .. import feature_selection as feat @@ -8,7 +9,6 @@ from .. import nearest_neighbors as nn from .. import normalization as norm from .. import quality_control as qc -from .. import batch_correction as correct @dataclass @@ -16,38 +16,39 @@ class MiscellaneousOptions: """Miscellaneous options for :py:meth:`~scranpy.analyze.analyze.analyze`. Attributes: - cell_names (Sequence[str], optional): + cell_names: Names for all cells in the dataset, to be added to any per-cell data frames. This should have the same length as the number of columns in each data matrix. - rna_feature_names (Sequence[str], optional): + rna_feature_names: Names for all features in the RNA data. This should have the same length as the number of rows in the RNA count matrix. - adt_feature_names (Sequence[str], optional): + adt_feature_names: Names for all tags in the ADT data. This should have the same length as the number of rows in the ADT count matrix. - crispr_feature_names (Sequence[str], optional): + crispr_feature_names: Names for all guides in the CRISPR data. This should have the same length as the number of rows in the CRISPR count matrix. - filter_on_rna_qc (bool): + filter_on_rna_qc: Whether to filter cells on the RNA-based quality control metrics, when RNA data is available. - filter_on_adt_qc (bool): + filter_on_adt_qc: Whether to filter cells on the ADT-based quality control metrics, when ADT data is available. - filter_on_crispr_qc (bool): + filter_on_crispr_qc: Whether to filter cells on the CRISPR-based quality control metrics, when CRISPR data is available. - snn_graph_multilevel_resolution (float): + snn_graph_multilevel_resolution: Resolution to use for multi-level clustering of the SNN graph. - block (Sequence, optional): Block assignment for each cell. + block: + Block assignment for each cell. This should have length equal to the total number of cells in the dataset, before any quality control is applied. """ @@ -74,104 +75,104 @@ class AnalyzeOptions: check out the setter methods of this class for more details. Attributes: - per_cell_rna_qc_metrics_options (PerCellRnaQcMetricsOptions): + per_cell_rna_qc_metrics_options: Options to pass to :py:meth:`~scranpy.quality_control.per_cell_rna_qc_metrics.per_cell_rna_qc_metrics`. - suggest_rna_qc_filters_options (SuggestRnaQcFiltersOptions): + suggest_rna_qc_filters_options: Options to pass to :py:meth:`~scranpy.quality_control.suggest_rna_qc_filters.suggest_rna_qc_filters`. - create_rna_qc_filter_options (CreateRnaQcFilterOptions): + create_rna_qc_filter_options: Options to pass to :py:meth:`~scranpy.quality_control.create_rna_qc_filter.create_rna_qc_filter`. - per_cell_adt_qc_metrics_options (PerCellRnaQcMetricsOptions): + per_cell_adt_qc_metrics_options: Options to pass to :py:meth:`~scranpy.quality_control.per_cell_adt_metrics.per_cell_adt_qc_metrics`. - suggest_adt_qc_filters_options (SuggestRnaQcFiltersOptions): + suggest_adt_qc_filters_options: Options to pass to :py:meth:`~scranpy.quality_control.suggest_adt_qc_filters.suggest_adt_qc_filters`. - create_adt_qc_filter_options (CreateRnaQcFilterOptions): + create_adt_qc_filter_options: Options to pass to :py:meth:`~scranpy.quality_control.create_adt_qc_filter.create_adt_qc_filter`. - per_cell_crispr_qc_metrics_options (PerCellRnaQcMetricsOptions): + per_cell_crispr_qc_metrics_options: Options to pass to :py:meth:`~scranpy.quality_control.per_cell_crispr_qc_metrics.per_cell_crispr_qc_metrics`. - suggest_crispr_qc_filters_options (SuggestRnaQcFiltersOptions): + suggest_crispr_qc_filters_options: Options to pass to :py:meth:`~scranpy.quality_control.suggest_crispr_qc_filters.suggest_crispr_qc_filters`. - create_crispr_qc_filter_options (CreateRnaQcFilterOptions): + create_crispr_qc_filter_options: Options to pass to :py:meth:`~scranpy.quality_control.create_crispr_qc_filter.create_crispr_qc_filter`. - filter_cells_options (FilterCellsOptions): + filter_cells_options: Options to pass to :py:meth:`~scranpy.quality_control.filter_cells.filter_cells`. - rna_log_norm_counts_options (LogNormCountsOptions): + rna_log_norm_counts_options: Options to pass to :py:meth:`~scranpy.normalization.log_norm_counts.log_norm_counts` for the RNA count matrix. - grouped_size_factors_options (GroupedSizeFactorsOptions): + grouped_size_factors_options: Options to pass to :py:meth:`~scranpy.normalization.grouped_size_factors.grouped_size_factors` to compute ADT size factors. - adt_log_norm_counts_options (LogNormCountsOptions): + adt_log_norm_counts_options: Options to pass to :py:meth:`~scranpy.normalization.log_norm_counts.log_norm_counts` for the ADT count matrix. - crispr_log_norm_counts_options (LogNormCountsOptions): + crispr_log_norm_counts_options: Options to pass to :py:meth:`~scranpy.normalization.log_norm_counts.log_norm_counts` for the CRISPR count matrix. - choose_hvgs_options (ChooseHvgsOptions): + choose_hvgs_options: Options to pass to :py:meth:`~scranpy.feature_selection.choose_hvgs.choose_hvgs` to choose highly variable genes for the RNA data. - model_gene_variances_options (ModelGeneVariancesOptions): + model_gene_variances_options: Options to pass to :py:meth:`~scranpy.feature_selection.model_gene_variances.model_gene_variances` to model per-gene variances for the RNA data. - rna_run_pca_options (RunPcaOptions): + rna_run_pca_options: Options to pass to :py:meth:`~scranpy.dimensionality_reduction.run_pca.run_pca` for the RNA log-expression matrix. - adt_run_pca_options (RunPcaOptions): + adt_run_pca_options: Options to pass to :py:meth:`~scranpy.dimensionality_reduction.run_pca.run_pca` for the ADT log-expression matrix. - crispr_run_pca_options (RunPcaOptions): + crispr_run_pca_options: Options to pass to :py:meth:`~scranpy.dimensionality_reduction.run_pca.run_pca` for the CRISPR log-expression matrix. - mnn_correct_options (MnnCorrectOptions): + mnn_correct_options: Options to pass to :py:meth:`~scranpy.batch_correction.mnn_correct.mnn_correct`. - build_neighbor_index_options (BuildNeighborIndexOptions): + build_neighbor_index_options: Options to pass to :py:meth:`~scranpy.nearest_neighbors.build_neighbor_index.build_neighbor_index`. - find_nearest_neighbors_options (FindNearestNeighborsOptions): + find_nearest_neighbors_options: Options to pass to :py:meth:`~scranpy.nearest_neighbors.find_nearest_neighbors.find_nearest_neighbors`. - run_tsne_options (RunTsneOptions): + run_tsne_options: Options to pass to :py:meth:`~scranpy.dimensionality_reduction.run_tsne.run_tsne`. - run_umap_options (RunUmapOptions): + run_umap_options: Options to pass to :py:meth:`~scranpy.dimensionality_reduction.run_umap.run_umap`. - build_snn_graph_options (BuildSnnGraphOptions): + build_snn_graph_options: Options to pass to :py:meth:`~scranpy.clustering.build_snn_graph.build_snn_graph`. - rna_score_markers_options (ScoreMarkersOptions): + rna_score_markers_options: Options to pass to :py:meth:`~scranpy.marker_detection.score_markers.score_markers` for the RNA log-expression values. - adt_score_markers_options (ScoreMarkersOptions): + adt_score_markers_options: Options to pass to :py:meth:`~scranpy.marker_detection.score_markers.score_markers` for the ADT log-abundances. - crispr_score_markers_options (ScoreMarkersOptions): + crispr_score_markers_options: Options to pass to :py:meth:`~scranpy.marker_detection.score_markers.score_markers` for the CRISPR log-abundances. - miscellaneous_options (MiscellaneousOptions): + miscellaneous_options: Further options that are not associated with any single function call. """ @@ -303,7 +304,7 @@ def set_seed(self, seed: int = 42): :py:meth:`~scranpy.dimensionality_reduction.run_umap.run_umap`. Args: - seed (int, optional): + seed: Seed for random number generation. """ self.run_pca_options.set_seed(seed) @@ -328,7 +329,8 @@ def set_threads(self, num_threads: int = 1): In all cases, thread utilization will not exceed the limit specified here in ``num_threads``. Args: - num_threads (int, optional): Number of threads. Defaults to 1. + num_threads: + Number of threads. Defaults to 1. """ self.per_cell_rna_qc_metrics_options.set_threads(num_threads) self.log_norm_counts_options.set_threads(num_threads) diff --git a/src/scranpy/analyze/AnalyzeResults.py b/src/scranpy/analyze/AnalyzeResults.py index 2d39d33..1ed0416 100644 --- a/src/scranpy/analyze/AnalyzeResults.py +++ b/src/scranpy/analyze/AnalyzeResults.py @@ -1,15 +1,15 @@ from dataclasses import dataclass from functools import singledispatchmethod +from typing import Any, Mapping, Optional, Sequence -from typing import Optional, Mapping, Any, Sequence -from singlecellexperiment import SingleCellExperiment from biocframe import BiocFrame -from numpy import ndarray, array, log1p, log -from igraph import Graph from delayedarray import DelayedArray +from igraph import Graph +from numpy import array, log, log1p, ndarray +from singlecellexperiment import SingleCellExperiment -from .. import dimensionality_reduction as dimred from .. import batch_correction as correct +from .. import dimensionality_reduction as dimred @dataclass @@ -17,93 +17,93 @@ class AnalyzeResults: """Results across all analyis steps from :py:meth:`~scranpy.analyze.analyze.analyze`. Attributes: - rna_quality_control_metrics (BiocFrame, optional): + rna_quality_control_metrics: Output of :py:meth:`~scranpy.quality_control.per_cell_rna_qc_metrics.per_cell_rna_qc_metrics`. - rna_quality_control_thresholds (BiocFrame, optional): + rna_quality_control_thresholds: Output of :py:meth:`~scranpy.quality_control.suggest_rna_qc_filters.suggest_rna_qc_filters`. - rna_quality_control_filter (ndarray, optional): + rna_quality_control_filter: Output of :py:meth:`~scranpy.quality_control.create_rna_qc_filter.create_rna_qc_filter`. - adt_quality_control_metrics (BiocFrame, optional): + adt_quality_control_metrics: Output of :py:meth:`~scranpy.quality_control.per_cell_adt_qc_metrics.per_cell_adt_qc_metrics`. - adt_quality_control_thresholds (BiocFrame, optional): + adt_quality_control_thresholds: Output of :py:meth:`~scranpy.quality_control.suggest_adt_qc_filters.suggest_adt_qc_filters`. - adt_quality_control_filter (ndarray, optional): + adt_quality_control_filter: Output of :py:meth:`~scranpy.quality_control.create_adt_qc_filter.create_adt_qc_filter`. - crispr_quality_control_metrics (BiocFrame, optional): + crispr_quality_control_metrics: Output of :py:meth:`~scranpy.quality_control.per_cell_crispr_qc_metrics.per_cell_crispr_qc_metrics`. - crispr_quality_control_thresholds (BiocFrame, optional): + crispr_quality_control_thresholds: Output of :py:meth:`~scranpy.quality_control.suggest_crispr_qc_filters.suggest_crispr_qc_filters`. - crispr_quality_control_filter (ndarray, optional): + crispr_quality_control_filter: Output of :py:meth:`~scranpy.quality_control.create_crispr_qc_filter.create_crispr_qc_filter`. - quality_control_retained (ndarray, optional): + quality_control_retained: Array of length equal to the number of cells in the dataset before quality filtering, indicating whether each cell should be retained. - rna_size_factors (ndarray, optional): + rna_size_factors: Array of length equal to the number of cells in the dataset after quality filtering, containing the size factor from the RNA data for each cell. - adt_size_factors (ndarray, optional): + adt_size_factors: Array of length equal to the number of cells in the dataset after quality filtering, containing the size factor from the ADT data for each cell. - crispr_size_factors (ndarray, optional): + crispr_size_factors: Array of length equal to the number of cells in the dataset after quality filtering, containing the size factor from the CRISPR data for each cell. - gene_variances (BiocFrame, optional): + gene_variances: Output of :py:meth:`~scranpy.feature_selection.model_gene_variances.model_gene_variances`. - hvgs (ndarray, optional): + hvgs: Output of :py:meth:`~scranpy.feature_selection.choose_hvgs.choose_hvgs`. - rna_pca (PcaResult, optional): + rna_pca: Output of :py:meth:`~scranpy.dimensionality_reduction.run_pca.run_pca` on the RNA data. - adt_pca (PcaResult, optional): + adt_pca: Output of :py:meth:`~scranpy.dimensionality_reduction.run_pca.run_pca` on the ADT data. - crispr_pca (PcaResult, optional): + crispr_pca: Output of :py:meth:`~scranpy.dimensionality_reduction.run_pca.run_pca` on the CRISPR data. - combined_pcs (PcaResult, optional): + combined_pcs: Output of :py:meth:`~scranpy.dimensionality_reduction.combine_embeddings.combine_embeddings` on the principal components for multiple modalities. - mnn (MnnCorrectResult, optional): + mnn: Output of :py:meth:`~scranpy.batch_correction.mnn_correct.mnn_correct`. - tsne (TsneEmbedding, optional): + tsne: Output of :py:meth:`~scranpy.dimensionality_reduction.run_tsne.run_tsne`. - umap (UmapEmbedding, optional): + umap: Output of :py:meth:`~scranpy.dimensionality_reduction.run_umap.run_umap`. - snn_graph (Graph, optional): + snn_graph: Output of :py:meth:`~scranpy.clustering.build_snn_graph.build_snn_graph`. - clusters (List, optional): + clusters: List of length equal to the number of cells in the (filtered) dataset, containing the cluster assignment for each cell. - rna_markers (Mapping, optional): + rna_markers: Output of :py:meth:`~scranpy.marker_detection.score_markers.score_markers` on the RNA data. - adt_markers (Mapping, optional): + adt_markers: Output of :py:meth:`~scranpy.marker_detection.score_markers.score_markers` on the ADT data. - crispr_markers (Mapping, optional): + crispr_markers: Output of :py:meth:`~scranpy.marker_detection.score_markers.score_markers` on the CRISPR data. """ @@ -182,8 +182,8 @@ def __to_sce( assays={"counts": filtered, "logcounts": normalized} ) rna_sce.row_names = rna_features - rna_sce.col_data = self.rna_quality_control_metrics[keep, :] - rna_sce.col_data["size_factors"] = self.rna_size_factors + rna_sce.column_data = self.rna_quality_control_metrics[keep, :] + rna_sce.column_data["size_factors"] = self.rna_size_factors rna_sce.row_data = self.gene_variances rna_sce.reduced_dims = {"pca": self.rna_pca.principal_components} main_sce = rna_sce @@ -197,8 +197,8 @@ def __to_sce( assays={"counts": filtered, "logcounts": normalized} ) adt_sce.row_names = adt_features - adt_sce.col_data = self.rna_quality_control_metrics[keep, :] - adt_sce.col_data["size_factors"] = self.adt_size_factors + adt_sce.column_data = self.adt_quality_control_metrics[keep, :] + adt_sce.column_data["size_factors"] = self.adt_size_factors adt_sce.reduced_dims = {"pca": self.adt_pca.principal_components} if main_sce is None: main_sce = adt_sce @@ -216,8 +216,8 @@ def __to_sce( assays={"counts": filtered, "logcounts": normalized} ) crispr_sce.row_names = crispr_features - crispr_sce.col_data = self.rna_quality_control_metrics[keep, :] - crispr_sce.col_data["size_factors"] = self.crispr_size_factors + crispr_sce.column_data = self.crispr_quality_control_metrics[keep, :] + crispr_sce.column_data["size_factors"] = self.crispr_size_factors crispr_sce.reduced_dims = {"pca": self.crispr_pca.principal_components} if main_sce is None: main_sce = crispr_sce @@ -252,12 +252,15 @@ def to_sce( """Save results as a :py:class:`singlecellexperiment.SingleCellExperiment`. Args: - x: Input object. usually a matrix of raw counts. - assay (str, optional): assay name for the matrix. + x: + Input object. usually a matrix of raw counts. + + assay: + Assay name for the matrix. Defaults to "counts". Returns: - SingleCellExperiment: An SCE with the results. + An SCE with the results. """ return self.__to_sce( rna_matrix=rna_matrix, @@ -275,7 +278,7 @@ def _( assay: str = "counts", include_gene_data: bool = False, ) -> SingleCellExperiment: - if assay not in x.assayNames: + if assay not in x.assay_names: raise ValueError(f"SCE does not contain a '{assay}' matrix.") mat = x.assay(assay) diff --git a/src/scranpy/analyze/analyze.py b/src/scranpy/analyze/analyze.py index dc7d32f..050f628 100644 --- a/src/scranpy/analyze/analyze.py +++ b/src/scranpy/analyze/analyze.py @@ -1,21 +1,19 @@ -from typing import Sequence, Union, Optional +from typing import Optional, Union + +from singlecellexperiment import SingleCellExperiment +from summarizedexperiment import SummarizedExperiment from .AnalyzeOptions import AnalyzeOptions from .AnalyzeResults import AnalyzeResults -from .live_analyze import live_analyze from .dry_analyze import dry_analyze +from .live_analyze import live_analyze from .update import update -from summarizedexperiment import SummarizedExperiment -from singlecellexperiment import SingleCellExperiment -from biocframe import BiocFrame -from .._utils import MatrixTypes - def analyze( rna_matrix, - adt_matrix = None, - crispr_matrix = None, + adt_matrix=None, + crispr_matrix=None, options: AnalyzeOptions = AnalyzeOptions(), dry_run: bool = False, ) -> Union[AnalyzeResults, str]: @@ -33,21 +31,27 @@ def analyze( - Marker detection for each cluster Arguments: - rna_matrix (optional): Count matrix for RNA data. + rna_matrix: + Count matrix for RNA data. Alternatively None if no RNA data is available. - adt_matrix (optional): Count matrix for the ADT data. + adt_matrix: + Count matrix for the ADT data. Alternatively None if no ADT data is available. - crispr_matrix (optional): Count matrix for the CRISPR data. + crispr_matrix: + Count matrix for the CRISPR data. Alternatively None if no CRISPR data is available. - options (AnalyzeOptions): Optional analysis parameters. + options: + Optional analysis parameters. - dry_run (bool): Whether to perform a dry run. + dry_run: + Whether to perform a dry run. Raises: - NotImplementedError: If ``matrix`` is not an expected type. + NotImplementedError: + If ``matrix`` is not an expected type. Returns: If ``dry_run = False``, a :py:class:`~scranpy.analyze.AnalyzeResults.AnalyzeResults` object is returned @@ -83,24 +87,30 @@ def analyze_se( :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment` inputs. Arguments: - rna_se (SummarizedExperiment, optional): SummarizedExperiment containing RNA data. + rna_se: + SummarizedExperiment containing RNA data. Alternatively None if no RNA data is available. - adt_se (SummarizedExperiment, optional): SummarizedExperiment containing ADT data. + adt_se: + SummarizedExperiment containing ADT data. Alternatively None if no ADT data is available. - crispr_se (SummarizedExperiment, optional): SummarizedExperiment containing CRISPR data. + crispr_se: + SummarizedExperiment containing CRISPR data. Alternatively None if no CRISPR data is available. - assay_type (Union[str, int]): + assay_type: Assay containing the count data in each SummarizedExperiment. - options (AnalyzeOptions): Optional analysis parameters. + options: + Optional analysis parameters. - dry_run (bool): Whether to perform a dry run. + dry_run: + Whether to perform a dry run. Raises: - NotImplementedError: If ``matrix`` is not an expected type. + NotImplementedError: + If ``matrix`` is not an expected type. Returns: If ``dry_run = False``, a :py:class:`~scranpy.analyze.AnalyzeResults.AnalyzeResults` object is returned @@ -108,6 +118,7 @@ def analyze_se( If ``dry_run = True``, a string is returned containing all the steps required to perform the analysis. """ + def exfil(se): if se is not None: return se.assay(assay_type), se.row_names @@ -120,18 +131,18 @@ def exfil(se): return analyze( rna_matrix, - adt_matrix = adt_matrix, - crispr_matrix = crispr_matrix, - options = update( + adt_matrix=adt_matrix, + crispr_matrix=crispr_matrix, + options=update( options, - miscellaneous_options = update( + miscellaneous_options=update( options.miscellaneous_options, - rna_feature_names = rna_features, - adt_feature_names = adt_features, - crispr_feature_names = crispr_features, + rna_feature_names=rna_features, + adt_feature_names=adt_features, + crispr_feature_names=crispr_features, ), ), - dry_run = dry_run, + dry_run=dry_run, ) @@ -148,33 +159,37 @@ def analyze_sce( :py:class:`~singlecellexperiment.SingleCellExperiment.SingleCellExperiment` inputs. Arguments: - sce (SingleCellExperiment): A :py:class:`singlecellexperiment.SingleCellExperiment` object, + sce: + A :py:class:`singlecellexperiment.SingleCellExperiment` object, possibly with data from other modalities in its alternative experiments. - rna_exp (Union[str, int], optional): + rna_exp: String or index specifying the alternative experiment containing the RNA data. An empty string is assumed to refer to the main experiment. If None, we assume that no RNA data is available. - adt_exp (Union[str, int], optional): + adt_exp: String or index specifying the alternative experiment containing the ADT data. An empty string is assumed to refer to the main experiment. If None, we assume that no RNA data is available. - crispr_exp (Union[str, int], optional): + crispr_exp: String or index specifying the alternative experiment containing the CRISPR data. An empty string is assumed to refer to the main experiment. If None, we assume that no RNA data is available. - assay_type (Union[str, int]): + assay_type: Assay containing the count data in each SummarizedExperiment. - options (AnalyzeOptions): Optional analysis parameters. + options: + Optional analysis parameters. - dry_run (bool): Whether to perform a dry run. + dry_run: + Whether to perform a dry run. Raises: - ValueError: If SCE does not contain a ``assay`` matrix. + ValueError: + If SCE does not contain a ``assay`` matrix. Returns: If ``dry_run = False``, a :py:class:`~scranpy.analyze.AnalyzeResults.AnalyzeResults` object is returned @@ -182,6 +197,7 @@ def analyze_sce( If ``dry_run = True``, a string is returned containing all the steps required to perform the analysis. """ + def exfil(sce, exp): if exp is None: return None @@ -191,10 +207,10 @@ def exfil(sce, exp): return sce.alternative_experiments[exp] return analyze_se( - rna_se = exfil(sce, rna_exp), - adt_se = exfil(sce, adt_exp), - crispr_se = exfil(sce, crispr_exp), - assay_type = assay_type, - options = options, - dry_run = dry_run, + rna_se=exfil(sce, rna_exp), + adt_se=exfil(sce, adt_exp), + crispr_se=exfil(sce, crispr_exp), + assay_type=assay_type, + options=options, + dry_run=dry_run, ) diff --git a/src/scranpy/analyze/run_neighbor_suite.py b/src/scranpy/analyze/run_neighbor_suite.py index 0d7f56c..b7c1040 100644 --- a/src/scranpy/analyze/run_neighbor_suite.py +++ b/src/scranpy/analyze/run_neighbor_suite.py @@ -1,11 +1,12 @@ -from typing import Tuple, Callable -from igraph import Graph from concurrent.futures import ProcessPoolExecutor, wait from copy import copy +from typing import Callable, Tuple + +from igraph import Graph -from .. import nearest_neighbors as nn -from .. import dimensionality_reduction as dimred from .. import clustering as clust +from .. import dimensionality_reduction as dimred +from .. import nearest_neighbors as nn def _unserialize_neighbors_before_run(f, serialized, opt): @@ -27,26 +28,28 @@ def run_neighbor_suite( other. Args: - principal_components (ndarray): + principal_components: Matrix of principal components where rows are cells and columns are PCs. Thi is usually produced by :py:meth:`~scranpy.dimensionality_reduction.run_pca.run_pca`. - build_neighbor_index_options (BuildNeighborIndexOptions, optional): Optional arguments to pass to + build_neighbor_index_options: + Optional arguments to pass to :py:meth:`~scranpy.nearest_neighbors.build_neighbor_index.build_neighbor_index`. - find_nearest_neighbors_options (FindNearestNeighborsOptions, optional): Optional arguments to pass to + find_nearest_neighbors_options: + Optional arguments to pass to :py:meth:`~scranpy.nearest_neighbors.find_nearest_neighbors.find_nearest_neighbors`. - run_umap_options (RunUmapOptions, optional): + run_umap_options: Optional arguments to pass to :py:meth:`~scranpy.dimensionality_reduction.run_umap.run_umap`. - run_tsne_options (RunTsneOptions, optional): + run_tsne_options: Optional arguments to pass to :py:meth:`~scranpy.dimensionality_reduction.run_tsne.run_tsne`. - build_snn_graph_options (BuildSnnGraphOptions, optional): + build_snn_graph_options: Optional arguments to pass to :py:meth:`~scranpy.clustering.build_snn_graph.build_snn_graph`. - num_threads (int, optional): + num_threads: Number of threads to use for the parallel execution of UMAP, t-SNE and SNN graph construction. This overrides the specified number of threads in ``run_umap``, ``run_tsne`` and ``build_snn_graph``. diff --git a/src/scranpy/analyze/update.py b/src/scranpy/analyze/update.py index 3c20dbe..b88b2bd 100644 --- a/src/scranpy/analyze/update.py +++ b/src/scranpy/analyze/update.py @@ -9,9 +9,11 @@ def update(options, **kwargs): """Convenience function to update the settings on an ``*Options`` object. Args: - options: Any of the ``*Options`` object> + options: + Any of the ``*Options`` object. - kwargs: Key-value pairs of settings to replace. + kwargs: + Key-value pairs of settings to replace. Results: A copy of ``options`` with replaced settings. diff --git a/src/scranpy/batch_correction/mnn_correct.py b/src/scranpy/batch_correction/mnn_correct.py index 3771d18..6306d67 100644 --- a/src/scranpy/batch_correction/mnn_correct.py +++ b/src/scranpy/batch_correction/mnn_correct.py @@ -1,9 +1,10 @@ -from numpy import ndarray, float64, int32 -from typing import Sequence, Optional from dataclasses import dataclass +from typing import Optional, Sequence + +from numpy import float64, int32, ndarray -from .._utils import factorize from .. import _cpphelpers as lib +from .._utils import factorize @dataclass @@ -11,35 +12,42 @@ class MnnCorrectOptions: """Options to pass to :py:meth:`~scranpy.batch_correction.mnn_correct.mnn_correct`. Attributes: - k: Number of neighbors for detecting mutual nearest neighbors. + k: + Number of neighbors for detecting mutual nearest neighbors. - approximate: Whether to perform an approximate nearest neighbor search. + approximate: + Whether to perform an approximate nearest neighbor search. - order: Ordering of batches to correct. The first + order: + Ordering of batches to correct. The first entry is used as the initial reference, and all subsequent batches are merged and added to the reference in the specified order. This should contain all unique levels in the ``batch`` argument supplied to :py:meth:`~scranpy.batch_correction.mnn_correct.mnn_correct`. If None, an appropriate ordering is automatically determined. - reference_policy: Policy to use for choosing the initial reference + reference_policy: + Policy to use for choosing the initial reference batch. This can be one of "max-rss" (maximum residual sum of squares within the batch, which is the default), "max-variance" (maximum variance within the batch), "max-size" (maximum number of cells), or "input" (using the supplied order of levels in ``batch``). Only used if ``order`` is not supplied. - num_mads: Number of median absolute deviations, used to define + num_mads: + Number of median absolute deviations, used to define the threshold for outliers when computing the center of mass for each cell involved in a MNN pair. Larger values reduce kissing but may incorporate inappropriately distant subpopulations in a cell's center of mass. - mass_cap: Cap on the number of observations used to compute the + mass_cap: + Cap on the number of observations used to compute the center of mass for each MNN-involved observation. The dataset is effectively downsampled to `c` observations for this calculation, which improves speed at the cost of some precision. - num_threads: Number of threads to use for the various MNN calculations. + num_threads: + Number of threads to use for the various MNN calculations. """ k: int = 15 @@ -84,13 +92,16 @@ def mnn_correct( """Identify mutual nearest neighbors (MNNs) to correct batch effects in a low-dimensional embedding. Args: - x: Numeric matrix where rows are cells and columns are dimensions, + x: + Numeric matrix where rows are cells and columns are dimensions, typically generated from :py:meth:`~scranpy.dimensionality_reduction.run_pca.run_pca`. - batch: Sequence of length equal to the number of cells (i.e., rows of ``x``), + batch: + Sequence of length equal to the number of cells (i.e., rows of ``x``), specifying the batch for each cell. - options: Optional parameters. + options: + Optional parameters. Returns: The corrected coordinates for each cell, along with some diagnostics diff --git a/src/scranpy/clustering/build_snn_graph.py b/src/scranpy/clustering/build_snn_graph.py index 0029e91..db8ab2c 100644 --- a/src/scranpy/clustering/build_snn_graph.py +++ b/src/scranpy/clustering/build_snn_graph.py @@ -2,7 +2,7 @@ from typing import Literal, Union from igraph import Graph -from numpy import ctypeslib, ndarray, copy +from numpy import copy, ctypeslib, ndarray from .. import _cpphelpers as lib from ..nearest_neighbors import ( @@ -22,7 +22,8 @@ class BuildSnnGraphOptions: """Optional arguments for :py:meth:`~scranpy.clustering.build_snn_graph.build_snn_graph`. Attributes: - num_neighbors: Number of neighbors to use. + num_neighbors: + Number of neighbors to use. Larger values result in a more interconnected graph and generally broader clusters from community detection. Ignored if ``input`` is a :py:class:`~scranpy.nearest_neighbors.find_nearest_neighbors.NeighborResults` object. Defaults to 15. @@ -44,7 +45,8 @@ class BuildSnnGraphOptions: Defaults to 1. Raises: - ValueError: If ``weight_scheme`` is not an expected value. + ValueError: + If ``weight_scheme`` is not an expected value. """ num_neighbors: int = 15 @@ -90,10 +92,12 @@ def build_snn_graph( (:py:class:`~scranpy.nearest_neighbors.find_nearest_neighbors.NeighborResults`). for all cells in the dataset. - options: Optional parameters. + options: + Optional parameters. Raises: - TypeError: If ``input`` is not a nearest neighbor search index or search result + TypeError: + If ``input`` is not a nearest neighbor search index or search result (:py:class:`~scranpy.nearest_neighbors.build_neighbor_index.NeighborIndex`, :py:class:`~scranpy.nearest_neighbors.find_nearest_neighbors.NeighborResults`). diff --git a/src/scranpy/dimensionality_reduction/combine_embeddings.py b/src/scranpy/dimensionality_reduction/combine_embeddings.py index 89d375f..6cc4b0e 100644 --- a/src/scranpy/dimensionality_reduction/combine_embeddings.py +++ b/src/scranpy/dimensionality_reduction/combine_embeddings.py @@ -1,11 +1,12 @@ -from numpy import ndarray, float64, int32, uintp, ones, array from dataclasses import dataclass from typing import Optional +from numpy import array, float64, int32, ndarray, ones, uintp + from .. import _cpphelpers as lib from ..nearest_neighbors import ( - build_neighbor_index, BuildNeighborIndexOptions, + build_neighbor_index, ) @@ -14,15 +15,19 @@ class CombineEmbeddingsOptions: """Options for :py:meth:`~scranpy.dimensionality_reduction.combine_embeddings.combine_embeddings`. Attributes: - neighbors: Number of neighbors to use for approximating the relative variance. + neighbors: + Number of neighbors to use for approximating the relative variance. - approximate: Whether to perform an approximate neighbor search. + approximate: + Whether to perform an approximate neighbor search. - weights: Weights to apply to each entry of ``embeddings``. If None, + weights: + Weights to apply to each entry of ``embeddings``. If None, all embeddings recieve equal weight. If any weight is zero, the corresponding embedding is omitted from the return value. - num_threads: Number of threads to use for the neighbor search. + num_threads: + Number of threads to use for the neighbor search. """ neighbors: int = 20 @@ -46,7 +51,7 @@ def combine_embeddings( All embeddings should have the same number of rows. options: - Further options. + Optional parameters. Returns: Array containing the combined embedding, where rows are cells and diff --git a/src/scranpy/dimensionality_reduction/run_pca.py b/src/scranpy/dimensionality_reduction/run_pca.py index c6a71b8..f1c18d0 100644 --- a/src/scranpy/dimensionality_reduction/run_pca.py +++ b/src/scranpy/dimensionality_reduction/run_pca.py @@ -3,10 +3,10 @@ from dataclasses import dataclass from typing import Literal, Optional, Sequence, Union -from numpy import ctypeslib, ndarray, copy +from numpy import copy, ctypeslib, ndarray from .. import _cpphelpers as lib -from .._utils import to_logical, tatamize_input, factorize, MatrixTypes +from .._utils import MatrixTypes, factorize, tatamize_input, to_logical __author__ = "ltla, jkanche" __copyright__ = "ltla, jkanche" @@ -15,9 +15,12 @@ PcaResult = namedtuple("PcaResult", ["principal_components", "variance_explained"]) PcaResult.__doc__ = """Named tuple of results from :py:meth:`~scranpy.dimensionality_reduction.run_pca.run_pca`. -principal_components: Matrix of principal component (PC) coordinates, +principal_components: + Matrix of principal component (PC) coordinates, where the rows are cells and columns are PCs. -variance_explained: Array of length equal to the number of PCs, + +variance_explained: + Array of length equal to the number of PCs, containing the percentage of variance explained by each PC. """ @@ -46,12 +49,14 @@ class RunPcaOptions: """Optional arguments for :py:meth:`~scranpy.dimensionality_reduction.run_pca.run_pca`. Attributes: - rank: Number of top PCs to compute. + rank: + Number of top PCs to compute. Larger values capture more biological structure at the cost of increasing computational work and absorbing more random noise. Defaults to 25. - subset: Array specifying which features should be + subset: + Array specifying which features should be used in the PCA (e.g., highly variable genes from :py:meth:`~scranpy.feature_selection.choose_hvgs.choose_hvgs`). This may contain integer indices or booleans. @@ -94,14 +99,16 @@ class RunPcaOptions: Whether to weight each block so that it contributes the same number of effective observations to the covariance matrix. Defaults to True. - num_threads: Number of threads to use. Defaults to 1. + num_threads: + Number of threads to use. Defaults to 1. assay_type: Assay to use from ``input`` if it is a :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`. Raises: - ValueError: If ``block_method`` is not an expected value. + ValueError: + If ``block_method`` is not an expected value. """ rank: int = 25 @@ -127,7 +134,8 @@ def run_pca(input: MatrixTypes, options: RunPcaOptions = RunPcaOptions()) -> Pca is the major source of variation in the dataset. Args: - input: Matrix-like object where rows are features and columns are cells, typically containing + input: + Matrix-like object where rows are features and columns are cells, typically containing log-normalized values. This should be a matrix class that can be converted into a :py:class:`~mattress.TatamiNumericPointer`. @@ -136,11 +144,15 @@ def run_pca(input: MatrixTypes, options: RunPcaOptions = RunPcaOptions()) -> Pca Developers may also provide the :py:class:`~mattress.TatamiNumericPointer` itself. - options: Optional parameters. + options: + Optional parameters. Raises: - TypeError: If ``input`` is not an expected type. - ValueError: if ``options.block`` does not match the number of cells. + TypeError: + If ``input`` is not an expected type. + + ValueError: + If ``options.block`` does not match the number of cells. Returns: Object containing the PC coordinates and the variance explained by each diff --git a/src/scranpy/dimensionality_reduction/run_tsne.py b/src/scranpy/dimensionality_reduction/run_tsne.py index 1536262..52ebeaf 100644 --- a/src/scranpy/dimensionality_reduction/run_tsne.py +++ b/src/scranpy/dimensionality_reduction/run_tsne.py @@ -1,9 +1,9 @@ import ctypes as ct -from typing import Union from collections import namedtuple from dataclasses import dataclass, field +from typing import Union -from numpy import float64, ndarray, copy +from numpy import copy, float64, ndarray from .. import _cpphelpers as lib from ..nearest_neighbors import ( @@ -21,9 +21,12 @@ TsneEmbedding = namedtuple("TsneEmbedding", ["x", "y"]) TsneEmbedding.__doc__ = """Named tuple of t-SNE coordinates. -x: a NumPy view of length equal to the number of cells, +x: + A NumPy view of length equal to the number of cells, containing the coordinate on the first dimension for each cell. -y: a NumPy view of length equal to the number of cells, + +y: + A NumPy view of length equal to the number of cells, containing the coordinate on the second dimension for each cell. """ @@ -46,7 +49,7 @@ def num_cells(self) -> int: """Get the number of cells in the dataset. Returns: - int: Number of cells. + Number of cells. """ return lib.fetch_tsne_status_nobs(self.__ptr) @@ -54,7 +57,7 @@ def iteration(self) -> int: """Get the current iteration number. Returns: - int: The current iteration number. + The current iteration number. """ return lib.fetch_tsne_status_iteration(self.__ptr) @@ -62,7 +65,7 @@ def clone(self) -> "TsneStatus": """Create a deep copy of the current state. Returns: - TsneStatus: Copy of the current state. + Copy of the current state. """ cloned = copy(self.coordinates) return TsneStatus(lib.clone_tsne_status(self.__ptr), cloned) @@ -74,7 +77,8 @@ def run(self, iteration: int): """Run the t-SNE algorithm up to the specified number of iterations. Args: - iteration: Number of iterations to run to. + iteration: + Number of iterations to run to. This should be greater than the current iteration number in :func:`~scranpy.dimensionality_reduction.run_tsne.TsneStatus.iteration`. """ @@ -84,7 +88,7 @@ def extract(self) -> TsneEmbedding: """Extract the t-SNE coordinates for each cell at the current iteration. Returns: - TsneEmbedding: 'x' and 'y' t-SNE coordinates for all cells. + 'x' and 'y' t-SNE coordinates for all cells. """ return TsneEmbedding(self.coordinates[:, 0], self.coordinates[:, 1]) @@ -95,10 +99,11 @@ def tsne_perplexity_to_neighbors(perplexity: float) -> int: nearest neighbor results to t-SNE functions. Args: - perplexity: Perplexity to use in the t-SNE algorithm. + perplexity: + Perplexity to use in the t-SNE algorithm. Returns: - int: Number of neighbors to search for. + Number of neighbors to search for. """ return lib.perplexity_to_k(perplexity) @@ -113,10 +118,12 @@ class InitializeTsneOptions: Larger values cause the embedding to focus more on broad structure instead of local structure. Defaults to 30. - num_threads: Number of threads to use for the + num_threads: + Number of threads to use for the neighbor search and t-SNE iterations. Defaults to 1. - seed: Seed to use for random initialization of + seed: + Seed to use for random initialization of the t-SNE coordinates. Defaults to 42. """ @@ -155,10 +162,12 @@ def initialize_tsne( in :py:class:`~scranpy.dimensionality_reduction.run_tsne.InitializeTsneOptions` (see also :py:meth:`~scranpy.dimensionality_reduction.run_tsne.tsne_perplexity_to_neighbors`). - options: Optional parameters. + options: + Optional parameters. Raises: - TypeError: If ``input`` is not an expected type. + TypeError: + If ``input`` is not an expected type. Returns: A t-SNE status object for further iterations. @@ -233,7 +242,8 @@ def run_tsne( in :py:class:`~scranpy.dimensionality_reduction.run_tsne.InitializeTsneOptions` (see also :py:meth:`~scranpy.dimensionality_reduction.run_tsne.tsne_perplexity_to_neighbors`). - options: Optional parameters. + options: + Optional parameters. Returns: Result containing first two dimensions. diff --git a/src/scranpy/dimensionality_reduction/run_umap.py b/src/scranpy/dimensionality_reduction/run_umap.py index 04ad70b..b5eb757 100644 --- a/src/scranpy/dimensionality_reduction/run_umap.py +++ b/src/scranpy/dimensionality_reduction/run_umap.py @@ -1,9 +1,9 @@ import ctypes as ct from collections import namedtuple from dataclasses import dataclass, field -from typing import Union, Optional +from typing import Optional, Union -from numpy import float64, ndarray, copy +from numpy import copy, float64, ndarray from .. import _cpphelpers as lib from ..nearest_neighbors import ( @@ -21,9 +21,12 @@ UmapEmbedding = namedtuple("UmapEmbedding", ["x", "y"]) UmapEmbedding.__doc__ = """Named tuple of UMAP coordinates. -x: a NumPy view of length equal to the number of cells, +x: + A NumPy view of length equal to the number of cells, containing the coordinate on the first dimension for each cell. -y: a NumPy view of length equal to the number of cells, + +y: + A NumPy view of length equal to the number of cells, containing the coordinate on the second dimension for each cell. """ @@ -46,7 +49,7 @@ def num_cells(self) -> int: """Get the number of cells in the dataset. Returns: - int: Number of cells. + Number of cells. """ return lib.fetch_umap_status_nobs(self.__ptr) @@ -54,7 +57,7 @@ def epoch(self) -> int: """Get the current epoch of the UMAP state. Returns: - int: The current epoch. + The current epoch. """ return lib.fetch_umap_status_epoch(self.__ptr) @@ -62,7 +65,7 @@ def num_epochs(self) -> int: """Get the total number of epochs for this UMAP run. Returns: - int: Number of epochs. + Number of epochs. """ return lib.fetch_umap_status_num_epochs(self.__ptr) @@ -70,7 +73,7 @@ def clone(self) -> "UmapStatus": """Create a deep copy of the current state. Returns: - UmapStatus: Copy of the current state. + Copy of the current state. """ cloned = copy(self.coordinates) return UmapStatus(lib.clone_umap_status(self.__ptr, cloned), cloned) @@ -82,7 +85,8 @@ def run(self, epoch_limit: Optional[int] = None): """Run the UMAP algorithm to the specified epoch limit. Args: - epoch_limit: Number of epochs to run up to. + epoch_limit: + Number of epochs to run up to. This should be greater than the current epoch in :func:`~scranpy.dimensionality_reduction.run_umap.UmapStatus.epoch`. """ @@ -94,7 +98,7 @@ def extract(self) -> UmapEmbedding: """Extract the UMAP coordinates for each cell at the current epoch. Returns: - UmapEmbedding: x and y UMAP coordinates for all cells. + `x` and `y` UMAP coordinates for all cells. """ return UmapEmbedding(self.coordinates[:, 0], self.coordinates[:, 1]) @@ -172,10 +176,12 @@ def initialize_umap( (:py:class:`~scranpy.nearest_neighbors.find_nearest_neighbors.NeighborResults`). for all cells in the dataset. - options: Optional parameters. + options: + Optional parameters. Raises: - TypeError: If ``input`` is not an expected type. + TypeError: + If ``input`` is not an expected type. Returns: A UMAP status object for iteration through the epochs. @@ -241,7 +247,8 @@ def run_umap( (:py:class:`~scranpy.nearest_neighbors.find_nearest_neighbors.NeighborResults`). for all cells in the dataset. - options: Optional parameters. + options: + Optional parameters. Returns: Result containing the first two dimensions. diff --git a/src/scranpy/feature_selection/choose_hvgs.py b/src/scranpy/feature_selection/choose_hvgs.py index f7f6b3b..0e3a8ce 100644 --- a/src/scranpy/feature_selection/choose_hvgs.py +++ b/src/scranpy/feature_selection/choose_hvgs.py @@ -18,6 +18,7 @@ class ChooseHvgsOptions: Number of HVGs to retain. Larger values preserve more biological structure at the cost of increasing computational work and random noise from less-variable genes. + Defaults to 2500. """ @@ -32,13 +33,15 @@ def choose_hvgs( biology, under the assumption that biological variation is larger than random noise. Args: - stat: Array of variance modelling statistics, + stat: + Array of variance modelling statistics, where larger values correspond to higher variability. This usually contains the residuals of the fitted mean-variance trend from :py:meth:`~scranpy.feature_selection.model_gene_variances.model_gene_variances`. - options: Optional parameters. + options: + Optional parameters. Return: Array of booleans of length equal to ``stat``, specifying whether a diff --git a/src/scranpy/feature_selection/model_gene_variances.py b/src/scranpy/feature_selection/model_gene_variances.py index 655f1b8..bc7d5b9 100644 --- a/src/scranpy/feature_selection/model_gene_variances.py +++ b/src/scranpy/feature_selection/model_gene_variances.py @@ -5,7 +5,7 @@ from numpy import float64, ndarray, uintp from .. import _cpphelpers as lib -from .._utils import factorize, tatamize_input, MatrixTypes +from .._utils import MatrixTypes, factorize, tatamize_input __author__ = "ltla, jkanche" __copyright__ = "ltla, jkanche" @@ -17,13 +17,15 @@ class ModelGeneVariancesOptions: """Optional arguments for :py:meth:`~scranpy.feature_selection.model_gene_variances.model_gene_variances`. Attributes: - block: Block assignment for each cell. + block: + Block assignment for each cell. Variance modelling is performed within each block to avoid interference from inter-block differences. If provided, this should have length equal to the number of cells, where cells have the same value if and only if they are in the same block. Defaults to None, indicating all cells are part of the same block. - span: Span to use for the LOWESS trend fitting. + span: + Span to use for the LOWESS trend fitting. Larger values yield a smoother curve and reduces the risk of overfitting, at the cost of being less responsive to local variations. Defaults to 0.3. @@ -36,7 +38,8 @@ class ModelGeneVariancesOptions: Sequence of feature names of length equal to the number of rows in ``input``. If provided, this is used as the row names of the output data frames. - num_threads: Number of threads to use. Defaults to 1. + num_threads: + Number of threads to use. Defaults to 1. """ block: Optional[Sequence] = None @@ -55,7 +58,7 @@ def model_gene_variances( :py:meth:`~scranpy.feature_selection.choose_hvgs.choose_hvgs`. Args: - input: + input: Matrix-like object where rows are features and columns are cells, typically containing log-normalized expression values from :py:meth:`~scranpy.normalization.log_norm_counts.log_norm_counts`. @@ -67,7 +70,8 @@ def model_gene_variances( Developers may also provide a :py:class:`~mattress.TatamiNumericPointer` directly. - options: Optional parameters. + options: + Optional parameters. Returns: Data frame with variance modelling results for each gene, specifically diff --git a/src/scranpy/feature_set_enrichment/hypergeometric_test.py b/src/scranpy/feature_set_enrichment/hypergeometric_test.py index 27d3f52..1603cf2 100644 --- a/src/scranpy/feature_set_enrichment/hypergeometric_test.py +++ b/src/scranpy/feature_set_enrichment/hypergeometric_test.py @@ -1,6 +1,7 @@ from dataclasses import dataclass from typing import Sequence, Union -from numpy import array, ndarray, int32, float64 + +from numpy import array, float64, int32, ndarray from .. import _cpphelpers as lib @@ -10,12 +11,15 @@ class HypergeometricTestOptions: """Options for :py:meth:`~scranpy.feature_set_enrichment.hypergeometric_tail.hypergeometric_tail`. Attributes: - log: Whether to report log-transformed p-values. + log: + Whether to report log-transformed p-values. - upper_tail: Whether to compute the upper tail of the hypergeometric distribution, + upper_tail: + Whether to compute the upper tail of the hypergeometric distribution, i.e., test for overrepresentation. - num_threads: Number of threads to use. + num_threads: + Number of threads to use. """ log: bool = False diff --git a/src/scranpy/feature_set_enrichment/score_feature_set.py b/src/scranpy/feature_set_enrichment/score_feature_set.py index 3dd73bd..b6f72b2 100644 --- a/src/scranpy/feature_set_enrichment/score_feature_set.py +++ b/src/scranpy/feature_set_enrichment/score_feature_set.py @@ -1,9 +1,10 @@ from dataclasses import dataclass -from typing import Sequence, Optional, Tuple, Union -from numpy import ndarray, float64 +from typing import Optional, Sequence, Tuple, Union + +from numpy import float64, ndarray from .. import _cpphelpers as lib -from .._utils import factorize, to_logical, tatamize_input, MatrixTypes +from .._utils import MatrixTypes, factorize, tatamize_input, to_logical @dataclass @@ -20,14 +21,16 @@ class ScoreFeatureSetOptions: cells have the same value if and only if they are in the same block. Defaults to None, indicating all cells are part of the same block. - scale: Whether to scale the features to unit variance before + scale: + Whether to scale the features to unit variance before computing the scores. assay_type: Assay to use from ``input`` if it is a :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`. - num_threads: Number of threads to use. + num_threads: + Number of threads to use. """ block: Optional[Sequence] = None @@ -42,8 +45,8 @@ def score_feature_set( options=ScoreFeatureSetOptions(), ) -> Tuple[ndarray, ndarray]: """Compute a score for the activity of a feature set in each cell. This is - done using a slightly modified version of the - `GSDecon `_ algorithm, + done using a slightly modified version of the + `GSDecon `_ algorithm, where we perform a PCA to obtain the rank-1 reconstruction of the feature set's expression values across all cells; the mean of the reconstructed values serves as the score per cell, while the rotation vector is reported @@ -64,7 +67,7 @@ def score_feature_set( specifying that the corresponding row belongs to the subset. options: - Further options. + Optional parameters. Returns: Tuple where the first array is of length equal to the diff --git a/src/scranpy/marker_detection/score_markers.py b/src/scranpy/marker_detection/score_markers.py index bea0819..c54f018 100644 --- a/src/scranpy/marker_detection/score_markers.py +++ b/src/scranpy/marker_detection/score_markers.py @@ -1,12 +1,12 @@ -from dataclasses import dataclass from collections import namedtuple -from typing import Mapping, Optional, Sequence, Union, Any +from dataclasses import dataclass +from typing import Any, Mapping, Optional, Sequence, Union from biocframe import BiocFrame -from numpy import ndarray, uintp, float64 +from numpy import float64, ndarray, uintp from .. import _cpphelpers as lib -from .._utils import process_block, factorize, tatamize_input, MatrixTypes +from .._utils import MatrixTypes, factorize, process_block, tatamize_input __author__ = "ltla, jkanche" __copyright__ = "ltla, jkanche" @@ -82,7 +82,8 @@ class ScoreMarkersOptions: Sequence of feature names of length equal to the number of rows in ``input``. If provided, this is used as the row names of the output data frames. - num_threads: Number of threads to use. Defaults to 1. + num_threads: + Number of threads to use. Defaults to 1. """ block: Optional[Sequence] = None @@ -104,7 +105,8 @@ def score_markers( :py:meth:`~scranpy.clustering.build_snn_graph.build_snn_graph`. Args: - input: Matrix-like object where rows are features and columns are cells, typically containing + input: + Matrix-like object where rows are features and columns are cells, typically containing expression values of some kind. This should be a matrix class that can be converted into a :py:class:`~mattress.TatamiNumericPointer.TatamiNumericPointer`. @@ -118,14 +120,16 @@ def score_markers( This should have length equal to the number of cells, where the entry for each cell specifies the assigned group for that cell. - options: Optional parameters. + options: + Optional parameters. Raises: - ValueError: If ``input`` is not an expected type. + ValueError: + If ``input`` is not an expected type. Returns: Dictionary where the keys are the group identifiers (as defined in ``grouping``) - and the values are :py:class:`~biocframe.BiocFrame.BiocFrame` objects containing + and the values are :py:class:`~biocframe.BiocFrame.BiocFrame` objects containing computed metrics for each group. """ x = tatamize_input(input, options.assay_type) diff --git a/src/scranpy/nearest_neighbors/build_neighbor_index.py b/src/scranpy/nearest_neighbors/build_neighbor_index.py index c6ae6a6..a5f64b5 100644 --- a/src/scranpy/nearest_neighbors/build_neighbor_index.py +++ b/src/scranpy/nearest_neighbors/build_neighbor_index.py @@ -51,8 +51,10 @@ class BuildNeighborIndexOptions: """Optional arguments for :py:meth:`~scranpy.nearest_neighbors.build_neighbor_index.build_neighbor_index`. Attributes: - approximate: Whether to build an index for an approximate + approximate: + Whether to build an index for an approximate neighbor search. This sacrifices some accuracy for speed. + Defaults to True. """ @@ -66,13 +68,16 @@ def build_neighbor_index( :py:meth:`~scranpy.nearest_neighbors.find_nearest_neighbors.find_nearest_neighbors`. Args: - input: A matrix where rows are cells and dimensions are columns. + input: + A matrix where rows are cells and dimensions are columns. This is usually the principal components matrix from :py:meth:`~scranpy.dimensionality_reduction.run_pca.run_pca`. - options: Optional parameters. + + options: + Optional parameters. Returns: - NeighborIndex: Nearest neighbor search index. + Nearest neighbor search index. """ if not input.flags.c_contiguous: raise ValueError("expected 'input' to have row-major layout") diff --git a/src/scranpy/nearest_neighbors/find_nearest_neighbors.py b/src/scranpy/nearest_neighbors/find_nearest_neighbors.py index 3266ff3..e73edb2 100644 --- a/src/scranpy/nearest_neighbors/find_nearest_neighbors.py +++ b/src/scranpy/nearest_neighbors/find_nearest_neighbors.py @@ -76,7 +76,8 @@ def num_neighbors(self) -> int: def get(self, i: int) -> SingleNeighborResults: """ Args: - i: Index of the cell of interest. + i: + Index of the cell of interest. Returns: A tuple with indices and distances to the nearest neighbors for cell ``i``. @@ -112,11 +113,12 @@ def unserialize(cls, content: SerializedNeighborResults) -> "NeighborResults": """Initialize an instance of this class from serialized nearest neighbor results. Args: - content: Result of + content: + Result of :py:meth:`~scranpy.nearest_neighbors.find_nearest_neighbors.NeighborResults.serialize`. Returns: - NeighborResults: Instance of this class, constructed from the data in ``content``. + Instance of this class, constructed from the data in ``content``. """ idx = content.index if not idx.flags.c_contiguous: @@ -139,7 +141,8 @@ class FindNearestNeighborsOptions: """Optional arguments for :py:meth:`~scranpy.nearest_neighbors.find_nearest_neighbors.find_nearest_neighbors`. Attributes: - num_threads: Number of threads to use. Defaults to 1. + num_threads: + Number of threads to use. Defaults to 1. """ num_threads: int = 1 @@ -153,13 +156,19 @@ def find_nearest_neighbors( """Find the nearest neighbors for each cell. Args: - idx: The nearest neighbor search index, usually built by + idx: + The nearest neighbor search index, usually built by :py:meth:`~scranpy.nearest_neighbors.build_neighbor_index.build_neighbor_index`. - k: Number of neighbors to find for each cell. - options: Optional parameters. + + k: + Number of neighbors to find for each cell. + + options: + Optional parameters. Raises: - TypeError: If ``idx`` is not a nearest neighbor index. + TypeError: + If ``idx`` is not a nearest neighbor index. Returns: Object containing the ``k`` nearest neighbors for each cell. diff --git a/src/scranpy/normalization/center_size_factors.py b/src/scranpy/normalization/center_size_factors.py index e4dfd94..ef30610 100644 --- a/src/scranpy/normalization/center_size_factors.py +++ b/src/scranpy/normalization/center_size_factors.py @@ -32,12 +32,14 @@ class CenterSizeFactorsOptions: This argument is ignored if the input ``size_factors`` are not double-precision, in which case a new array is always returned. - allow_zeros: Whether to gracefully handle zero size factors. + allow_zeros: + Whether to gracefully handle zero size factors. If True, zero size factors are automatically set to the smallest non-zero size factor. If False, an error is raised. Defaults to False. - allow_non_finite: Whether to gracefully handle missing or infinite size factors. + allow_non_finite: + Whether to gracefully handle missing or infinite size factors. If True, infinite size factors are automatically set to the largest non-zero size factor, while missing values are automatically set to 1. If False, an error is raised. @@ -60,10 +62,12 @@ def center_size_factors( size_factors: Floating-point array containing size factors for all cells. - options: Optional parameters. + options: + Optional parameters. Raises: - TypeError, ValueError: If arguments don't meet expectations. + TypeError, ValueError: + If arguments don't meet expectations. Returns: Array containing centered size factors. diff --git a/src/scranpy/normalization/grouped_size_factors.py b/src/scranpy/normalization/grouped_size_factors.py index b7b937b..4e92153 100644 --- a/src/scranpy/normalization/grouped_size_factors.py +++ b/src/scranpy/normalization/grouped_size_factors.py @@ -1,9 +1,10 @@ from dataclasses import dataclass from typing import Optional, Sequence, Union -from numpy import ndarray, float64, array -from .._utils import MatrixTypes, tatamize_input, factorize, process_block +from numpy import array, float64, ndarray + from .. import _cpphelpers as lib +from .._utils import MatrixTypes, factorize, process_block, tatamize_input @dataclass @@ -14,11 +15,11 @@ class GroupedSizeFactorsOptions: groups: Sequence of group assignments, of length equal to the number of cells. - rank: + rank: Number of principal components to obtain in the low-dimensional representation prior to clustering. Only used if ``clusters`` is None. - block: + block: Sequence of block assignments, where PCA and clustering is performed within each block. Only used if ``clusters`` is None. @@ -32,6 +33,7 @@ class GroupedSizeFactorsOptions: num_threads: Number of threads to use for the various calculations. """ + rank: int = 25 groups: Optional[Sequence] = None block: Optional[Sequence] = None @@ -40,13 +42,15 @@ class GroupedSizeFactorsOptions: num_threads: int = 1 -def grouped_size_factors(input: MatrixTypes, options: GroupedSizeFactorsOptions = GroupedSizeFactorsOptions()) -> ndarray: +def grouped_size_factors( + input: MatrixTypes, options: GroupedSizeFactorsOptions = GroupedSizeFactorsOptions() +) -> ndarray: """Compute grouped size factors to remove composition biases between groups of cells. This sums all cells from the same group into a pseudo-cell, applies median-based normalization between pseudo-cells, and propagates the pseudo-cell size factors back to each cell via library size scaling. Args: - input: + input: Matrix-like object where rows are features and columns are cells, typically containing expression values of some kind. This should be a matrix class that can be converted into a :py:class:`~mattress.TatamiNumericPointer.TatamiNumericPointer`. @@ -68,28 +72,36 @@ def grouped_size_factors(input: MatrixTypes, options: GroupedSizeFactorsOptions if options.groups is not None: group_levels, group_indices = factorize(options.groups) if len(group_indices) != ptr.ncol(): - raise ValueError("length of 'options.groups' should be equal to number of cells in 'input'") - lib.grouped_size_factors_with_clusters(ptr.ptr, group_indices, output, options.num_threads) + raise ValueError( + "length of 'options.groups' should be equal to number of cells in 'input'" + ) + lib.grouped_size_factors_with_clusters( + ptr.ptr, group_indices, output, options.num_threads + ) else: - use_block, num_blocks, block_names, block_info, block_offset = process_block(options.block, ptr.ncol()) + use_block, num_blocks, block_names, block_info, block_offset = process_block( + options.block, ptr.ncol() + ) use_init_sf = options.initial_factors is not None init_sf_info = None init_sf_offset = 0 if use_init_sf: if len(options.initial_factors) != ptr.ncol(): - raise ValueError("length of 'options.initial_factors' should be equal to the number of cells in 'input'") + raise ValueError( + "length of 'options.initial_factors' should be equal to the number of cells in 'input'" + ) init_sf_info = array(options.initial_factors, copy=False, dtype=float64) init_sf_offset = init_sf_info.ctypes.data lib.grouped_size_factors_without_clusters( - ptr.ptr, - use_block, - block_offset, + ptr.ptr, + use_block, + block_offset, use_init_sf, init_sf_offset, - options.rank, - output, + options.rank, + output, options.num_threads, ) diff --git a/src/scranpy/normalization/log_norm_counts.py b/src/scranpy/normalization/log_norm_counts.py index e43f381..4f84fb6 100644 --- a/src/scranpy/normalization/log_norm_counts.py +++ b/src/scranpy/normalization/log_norm_counts.py @@ -1,15 +1,15 @@ from __future__ import annotations +from copy import copy from dataclasses import dataclass, field from typing import Optional, Sequence, Union -from mattress import TatamiNumericPointer, tatamize -from numpy import float64, ndarray, log1p, log from delayedarray import DelayedArray -from copy import copy +from mattress import TatamiNumericPointer, tatamize +from numpy import array, float64, log, log1p, ndarray from .. import _cpphelpers as lib -from .center_size_factors import center_size_factors, CenterSizeFactorsOptions +from .center_size_factors import CenterSizeFactorsOptions, center_size_factors __author__ = "ltla, jkanche" __copyright__ = "ltla, jkanche" @@ -21,26 +21,31 @@ class LogNormCountsOptions: """Optional arguments for :py:meth:`~scranpy.normalization.log_norm_counts.log_norm_counts`. Attributes: - size_factors: Size factors for each cell. + size_factors: + Size factors for each cell. Defaults to None, in which case the library sizes are used. - delayed: Whether to force the log-normalization to be + delayed: + Whether to force the log-normalization to be delayed. This reduces memory usage by avoiding unnecessary copies of the count matrix. - center: Whether to center the size factors. Defaults to True. + center: + Whether to center the size factors. Defaults to True. center_size_factors_options: Optional arguments to pass to :py:meth:`~scranpy.normalization.center_size_factors.center_size_factors` if ``center = True``. - with_size_factors: Whether to return the (possibly centered) size factors in the output. + with_size_factors: + Whether to return the (possibly centered) size factors in the output. assay_type: Assay to use from ``input`` if it is a :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`. - num_threads: Number of threads to use to compute size factors, + num_threads: + Number of threads to use to compute size factors, if none are provided in ``size_factors``. Defaults to 1. """ @@ -73,10 +78,12 @@ def log_norm_counts(input, options: LogNormCountsOptions = LogNormCountsOptions( Developers may also provide a :py:class:`~mattress.TatamiNumericPointer.TatamiNumericPointer` directly. - options: Optional parameters. + options: + Optional parameters. Raises: - TypeError, ValueError: If arguments don't meet expectations. + TypeError, ValueError: + If arguments don't meet expectations. Returns: If `options.with_size_factors = False`, the log-normalized matrix is diff --git a/src/scranpy/quality_control/_utils.py b/src/scranpy/quality_control/_utils.py index 6e99051..54458da 100644 --- a/src/scranpy/quality_control/_utils.py +++ b/src/scranpy/quality_control/_utils.py @@ -1,7 +1,9 @@ -from .._utils import match_lists, create_pointer_array -from numpy import float64, ndarray, array -from typing import Tuple, Optional, Union +from typing import Optional, Tuple, Union + from biocframe import BiocFrame +from numpy import array, float64, ndarray + +from .._utils import create_pointer_array, match_lists def process_subset_columns(subsets: BiocFrame) -> Tuple[list[ndarray], ndarray]: @@ -44,12 +46,11 @@ def check_custom_thresholds( "number of rows in 'custom_thresholds' should equal the number of blocks" ) - if num_blocks > 1 and custom_thresholds.rownames != block_names: - m = match_lists(block_names, custom_thresholds.rownames) + if num_blocks > 1 and custom_thresholds.row_names != block_names: + m = match_lists(block_names, custom_thresholds.row_names) if m is None: raise ValueError( "row names of 'custom_thresholds' should equal the unique values of 'block'" ) - custom_thresholds = custom_thresholds[m, :] - + custom_thresholds = custom_thresholds[list(m), :] return custom_thresholds diff --git a/src/scranpy/quality_control/create_adt_qc_filter.py b/src/scranpy/quality_control/create_adt_qc_filter.py index 9b02902..b3ca081 100644 --- a/src/scranpy/quality_control/create_adt_qc_filter.py +++ b/src/scranpy/quality_control/create_adt_qc_filter.py @@ -2,7 +2,7 @@ from typing import Optional, Sequence from biocframe import BiocFrame -from numpy import bool_, float64, int32, ndarray, zeros, uint8 +from numpy import bool_, float64, int32, ndarray, uint8, zeros from .. import _cpphelpers as lib from .._utils import process_block @@ -35,16 +35,19 @@ def create_adt_qc_filter( """Defines a filtering vector based on the RNA-derived per-cell quality control (QC) metrics and thresholds. Args: - metrics: Data frame of metrics, + metrics: + Data frame of metrics, see :py:meth:`~scranpy.quality_control.adt.per_cell_adt_qc_metrics` for the expected format. - thresholds: Data frame of filter thresholds, + thresholds: + Data frame of filter thresholds, see :py:meth:`~scranpy.quality_control.adt.suggest_adt_qc_filters` for the expected format. - options: Optional parameters. + options: + Optional parameters. Returns: - A boolean array where True entries mark the cells to be discarded. + A boolean array where True entries mark the cells to be discarded. """ if not isinstance(metrics, BiocFrame): diff --git a/src/scranpy/quality_control/create_crispr_qc_filter.py b/src/scranpy/quality_control/create_crispr_qc_filter.py index ad339f3..84960e0 100644 --- a/src/scranpy/quality_control/create_crispr_qc_filter.py +++ b/src/scranpy/quality_control/create_crispr_qc_filter.py @@ -2,7 +2,7 @@ from typing import Optional, Sequence from biocframe import BiocFrame -from numpy import bool_, float64, ndarray, zeros, uint8 +from numpy import bool_, float64, ndarray, uint8, zeros from .. import _cpphelpers as lib from .._utils import process_block @@ -34,18 +34,21 @@ def create_crispr_qc_filter( """Defines a filtering vector based on the RNA-derived per-cell quality control (QC) metrics and thresholds. Args: - metrics: Data frame of metrics, + metrics: + Data frame of metrics, see :py:meth:`~scranpy.quality_control.per_cell_crispr_qc_metrics.per_cell_crispr_qc_metrics` for the expected format. - thresholds: Data frame of filter thresholds, + thresholds: + Data frame of filter thresholds, see :py:meth:`~scranpy.quality_control.suggest_crispr_qc_filters.suggest_crispr_qc_filters` for the expected format. - options: Optional parameters. + options: + Optional parameters. Returns: - A boolean array where True entries mark the cells to be discarded. + A boolean array where True entries mark the cells to be discarded. """ if not isinstance(metrics, BiocFrame): diff --git a/src/scranpy/quality_control/create_rna_qc_filter.py b/src/scranpy/quality_control/create_rna_qc_filter.py index e47f569..33524b7 100644 --- a/src/scranpy/quality_control/create_rna_qc_filter.py +++ b/src/scranpy/quality_control/create_rna_qc_filter.py @@ -2,7 +2,7 @@ from typing import Optional, Sequence from biocframe import BiocFrame -from numpy import bool_, float64, int32, ndarray, zeros, uint8, array +from numpy import array, bool_, float64, int32, ndarray, uint8, zeros from .. import _cpphelpers as lib from .._utils import process_block @@ -35,16 +35,19 @@ def create_rna_qc_filter( """Defines a filtering vector based on the RNA-derived per-cell quality control (QC) metrics and thresholds. Args: - metrics: Data frame of metrics, + metrics: + Data frame of metrics, see :py:meth:`~scranpy.quality_control.rna.per_cell_rna_qc_metrics` for the expected format. - thresholds: Data frame of filter thresholds, + thresholds: + Data frame of filter thresholds, see :py:meth:`~scranpy.quality_control.rna.suggest_rna_qc_filters` for the expected format. - options: Optional parameters. + options: + Optional parameters. - Returns: - A boolean array where True entries mark the cells to be discarded. + Returns: + A boolean array where True entries mark the cells to be discarded. """ if not isinstance(metrics, BiocFrame): diff --git a/src/scranpy/quality_control/filter_cells.py b/src/scranpy/quality_control/filter_cells.py index c810077..2a090ea 100644 --- a/src/scranpy/quality_control/filter_cells.py +++ b/src/scranpy/quality_control/filter_cells.py @@ -1,9 +1,9 @@ from dataclasses import dataclass +from typing import Sequence, Union -from mattress import TatamiNumericPointer -from numpy import logical_or, logical_and, zeros, ones, uint8, array from delayedarray import DelayedArray -from typing import Union, Sequence +from mattress import TatamiNumericPointer +from numpy import array, logical_and, logical_or, ones, uint8, zeros from .. import _cpphelpers as lib from .._utils import to_logical @@ -18,11 +18,13 @@ class FilterCellsOptions: """Optional arguments for :py:meth:`~scranpy.quality_control.filter_cells.filter_cells`. Attributes: - discard: Whether to discard the cells listed in ``filter``. + discard: + Whether to discard the cells listed in ``filter``. If False, the specified cells are retained instead, and all other cells are discarded. Defaults to True. - intersect: Whether to take the intersection or union of + intersect: + Whether to take the intersection or union of multiple ``filter`` arrays, to create a combined filtering array. Note that this is orthogonal to ``discard``. @@ -30,7 +32,8 @@ class FilterCellsOptions: Whether to return a vector specifying which cells are to be retained. - delayed: Whether to force the filtering operation to be + delayed: + Whether to force the filtering operation to be delayed. This reduces memory usage by avoiding unnecessary copies of the count matrix. """ @@ -41,7 +44,6 @@ class FilterCellsOptions: delayed: bool = True - def filter_cells( input, filter: Union[Sequence[int], Sequence[bool], tuple], @@ -65,7 +67,8 @@ def filter_cells( Alternatively, a tuple of such arrays, to be combined into a single filtering vector according to ``options.intersect``. - options: Optional parameters. + options: + Optional parameters. Returns: If ``options.with_retain_vector = False``, the filtered matrix is diff --git a/src/scranpy/quality_control/guess_mito_from_symbols.py b/src/scranpy/quality_control/guess_mito_from_symbols.py index 92a8966..797c8d1 100644 --- a/src/scranpy/quality_control/guess_mito_from_symbols.py +++ b/src/scranpy/quality_control/guess_mito_from_symbols.py @@ -5,8 +5,11 @@ def guess_mito_from_symbols(symbols: Sequence[str], prefix: str) -> Sequence[int """Guess mitochondrial genes from their gene symbols. Args: - symbols: List of gene symbols. - prefix: Case-insensitive prefix to guess mitochondrial genes. + symbols: + List of gene symbols. + + prefix: + Case-insensitive prefix to guess mitochondrial genes. Return: List of integer indices for the guessed mitochondrial genes. diff --git a/src/scranpy/quality_control/per_cell_adt_qc_metrics.py b/src/scranpy/quality_control/per_cell_adt_qc_metrics.py index dd27ca0..6e523af 100644 --- a/src/scranpy/quality_control/per_cell_adt_qc_metrics.py +++ b/src/scranpy/quality_control/per_cell_adt_qc_metrics.py @@ -1,11 +1,11 @@ from dataclasses import dataclass -from typing import Mapping, Optional, Union, Sequence +from typing import Mapping, Optional, Sequence, Union from biocframe import BiocFrame from numpy import float64, int32, ndarray from .. import _cpphelpers as lib -from .._utils import to_logical, tatamize_input, create_pointer_array, MatrixTypes +from .._utils import MatrixTypes, create_pointer_array, tatamize_input, to_logical from ._utils import create_subset_buffers, create_subset_frame @@ -14,7 +14,8 @@ class PerCellAdtQcMetricsOptions: """Optional arguments for :py:meth:`~scranpy.quality_control.per_cell_adt_qc_metrics.per_cell_adt_qc_metrics`. Attributes: - subsets: Dictionary of feature subsets. + subsets: + Dictionary of feature subsets. Each key is the name of the subset and each value is an array. Each array may contain integer indices to the rows of `input` belonging to the subset. @@ -31,7 +32,8 @@ class PerCellAdtQcMetricsOptions: Sequence of cell names of length equal to the number of columns in ``input``. If provided, this is used as the row names of the output data frames. - num_threads: Number of threads to use. Defaults to 1. + num_threads: + Number of threads to use. Defaults to 1. """ subsets: Optional[Mapping] = None @@ -50,7 +52,8 @@ def per_cell_adt_qc_metrics( for each cell for diagnostic purposes. Args: - input: Matrix-like object where rows are features and columns are cells, typically containing + input: + Matrix-like object where rows are features and columns are cells, typically containing expression values of some kind. This should be a matrix class that can be converted into a :py:class:`~mattress.TatamiNumericPointer.TatamiNumericPointer`. @@ -59,10 +62,12 @@ def per_cell_adt_qc_metrics( Developers may also provide a :py:class:`~mattress.TatamiNumericPointer.TatamiNumericPointer` directly. - options: Optional parameters. + options: + Optional parameters. Raises: - TypeError: If ``input`` is not an expected matrix type. + TypeError: + If ``input`` is not an expected matrix type. Returns: A data frame containing one row per cell and the following fields - diff --git a/src/scranpy/quality_control/per_cell_crispr_qc_metrics.py b/src/scranpy/quality_control/per_cell_crispr_qc_metrics.py index df22216..d09f270 100644 --- a/src/scranpy/quality_control/per_cell_crispr_qc_metrics.py +++ b/src/scranpy/quality_control/per_cell_crispr_qc_metrics.py @@ -1,11 +1,11 @@ from dataclasses import dataclass +from typing import Optional, Sequence, Union from biocframe import BiocFrame from numpy import float64, int32, ndarray -from typing import Union, Sequence, Optional from .. import _cpphelpers as lib -from .._utils import tatamize_input, MatrixTypes +from .._utils import MatrixTypes, tatamize_input @dataclass @@ -21,7 +21,8 @@ class PerCellCrisprQcMetricsOptions: Sequence of cell names of length equal to the number of columns in ``input``. If provided, this is used as the row names of the output data frames. - num_threads: Number of threads to use. Defaults to 1. + num_threads: + Number of threads to use. Defaults to 1. """ assay_type: Union[int, str] = 0 @@ -40,7 +41,8 @@ def per_cell_crispr_qc_metrics( identity of the most abundant guide is also reported. Args: - input: Matrix-like object where rows are features and columns are cells, typically containing + input: + Matrix-like object where rows are features and columns are cells, typically containing expression values of some kind. This should be a matrix class that can be converted into a :py:class:`~mattress.TatamiNumericPointer.TatamiNumericPointer`. @@ -49,10 +51,12 @@ def per_cell_crispr_qc_metrics( Developers may also provide a :py:class:`~mattress.TatamiNumericPointer.TatamiNumericPointer` directly. - options: Optional parameters. + options: + Optional parameters. Raises: - TypeError: If ``input`` is not an expected matrix type. + TypeError: + If ``input`` is not an expected matrix type. Returns: A data frame containing one row per cell and the following fields - diff --git a/src/scranpy/quality_control/per_cell_rna_qc_metrics.py b/src/scranpy/quality_control/per_cell_rna_qc_metrics.py index 2ea8281..cd45cf4 100644 --- a/src/scranpy/quality_control/per_cell_rna_qc_metrics.py +++ b/src/scranpy/quality_control/per_cell_rna_qc_metrics.py @@ -1,11 +1,11 @@ from dataclasses import dataclass -from typing import Mapping, Optional, Union, Sequence +from typing import Mapping, Optional, Sequence, Union from biocframe import BiocFrame from numpy import float64, int32, ndarray from .. import _cpphelpers as lib -from .._utils import to_logical, tatamize_input, MatrixTypes, create_pointer_array +from .._utils import MatrixTypes, create_pointer_array, tatamize_input, to_logical from ._utils import create_subset_buffers, create_subset_frame @@ -14,7 +14,8 @@ class PerCellRnaQcMetricsOptions: """Optional arguments for :py:meth:`~scranpy.quality_control.per_cell_rna_qc_metrics.per_cell_rna_qc_metrics`. Attributes: - subsets: Dictionary of feature subsets. + subsets: + Dictionary of feature subsets. Each key is the name of the subset and each value is an array. Each array may contain integer indices to the rows of `input` belonging to the subset. @@ -31,7 +32,8 @@ class PerCellRnaQcMetricsOptions: Sequence of cell names of length equal to the number of columns in ``input``. If provided, this is used as the row names of the output data frames. - num_threads: Number of threads to use. Defaults to 1. + num_threads: + Number of threads to use. Defaults to 1. """ subsets: Optional[Mapping] = None @@ -50,7 +52,8 @@ def per_cell_rna_qc_metrics( feature subsets, typically mitochondrial genes where high values are indicative of cell damage. Args: - input: Matrix-like object where rows are features and columns are cells, typically containing + input: + Matrix-like object where rows are features and columns are cells, typically containing expression values of some kind. This should be a matrix class that can be converted into a :py:class:`~mattress.TatamiNumericPointer.TatamiNumericPointer`. @@ -59,10 +62,12 @@ def per_cell_rna_qc_metrics( Developers may also provide a :py:class:`~mattress.TatamiNumericPointer.TatamiNumericPointer` directly. - options: Optional parameters. + options: + Optional parameters. Raises: - TypeError: If ``input`` is not an expected matrix type. + TypeError: + If ``input`` is not an expected matrix type. Returns: A data frame containing one row per cell and the following fields - diff --git a/src/scranpy/quality_control/suggest_adt_qc_filters.py b/src/scranpy/quality_control/suggest_adt_qc_filters.py index b1f1034..6a366d2 100644 --- a/src/scranpy/quality_control/suggest_adt_qc_filters.py +++ b/src/scranpy/quality_control/suggest_adt_qc_filters.py @@ -2,15 +2,15 @@ from typing import Optional, Sequence from biocframe import BiocFrame -from numpy import float64, int32, ndarray, array +from numpy import array, float64, int32, ndarray from .. import _cpphelpers as lib from .._utils import process_block from ._utils import ( - process_subset_columns, check_custom_thresholds, create_subset_buffers, create_subset_frame, + process_subset_columns, ) @@ -60,14 +60,17 @@ def suggest_adt_qc_filters( quality and should be removed before further analysis. Args: - metrics: A data frame containing QC metrics for each cell, + metrics: + A data frame containing QC metrics for each cell, see the output of :py:meth:`~scranpy.quality_control.per_cell_adt_qc_metrics.per_cell_adt_qc_metrics` for the expected format. - options: Optional parameters. + options: + Optional parameters. Raises: - ValueError, TypeError: if provided ``inputs`` are incorrect type or do + ValueError, TypeError: + If provided ``inputs`` are incorrect type or do not contain expected metrics. Returns: diff --git a/src/scranpy/quality_control/suggest_crispr_qc_filters.py b/src/scranpy/quality_control/suggest_crispr_qc_filters.py index 74fe8ba..a14b8cb 100644 --- a/src/scranpy/quality_control/suggest_crispr_qc_filters.py +++ b/src/scranpy/quality_control/suggest_crispr_qc_filters.py @@ -54,14 +54,17 @@ def suggest_crispr_qc_filters( low counts due to failed transfection. (Multiple transfections are not considered undesirable at this point.) Args: - metrics: A data frame containing QC metrics for each cell, + metrics: + A data frame containing QC metrics for each cell, see the output of :py:meth:`~scranpy.quality_control.per_cell_crispr_qc_metrics.per_cell_crispr_qc_metrics` for the expected format. - options: Optional parameters. + options: + Optional parameters. Raises: - ValueError, TypeError: if provided ``inputs`` are incorrect type or do + ValueError, TypeError: + If provided ``inputs`` are incorrect type or do not contain expected metrics. Returns: diff --git a/src/scranpy/quality_control/suggest_rna_qc_filters.py b/src/scranpy/quality_control/suggest_rna_qc_filters.py index 5cb9c81..82f7dfa 100644 --- a/src/scranpy/quality_control/suggest_rna_qc_filters.py +++ b/src/scranpy/quality_control/suggest_rna_qc_filters.py @@ -2,15 +2,15 @@ from typing import Optional, Sequence from biocframe import BiocFrame -from numpy import float64, int32, ndarray, array +from numpy import array, float64, int32, ndarray from .. import _cpphelpers as lib from .._utils import process_block from ._utils import ( - process_subset_columns, check_custom_thresholds, create_subset_buffers, create_subset_frame, + process_subset_columns, ) @@ -59,13 +59,16 @@ def suggest_rna_qc_filters( removed before further analysis. Args: - metrics: A data frame containing QC metrics for each cell, + metrics: + A data frame containing QC metrics for each cell, see the output of :py:meth:`~scranpy.quality_control.rna.per_cell_rna_qc_metrics` for the expected format. - options: Optional parameters. + options: + Optional parameters. Raises: - ValueError, TypeError: if provided ``inputs`` are incorrect type or do + ValueError, TypeError: + If provided ``inputs`` are incorrect type or do not contain expected metrics. Returns: diff --git a/tests/test_aggregate_across_cells.py b/tests/test_aggregate_across_cells.py index 9ac9b11..fc977fc 100644 --- a/tests/test_aggregate_across_cells.py +++ b/tests/test_aggregate_across_cells.py @@ -8,7 +8,7 @@ def test_aggregate_across_cells_simple(): x = numpy.round(numpy.random.rand(1000, 8) * 5) y = scranpy.aggregate_across_cells(x, groups) - assert y.col_data.column("factor_1") == ["A", "B", "C", "D"] + assert list(y.col_data.column("factor_1")) == ["A", "B", "C", "D"] assert list(y.col_data.column("counts")) == [3, 2, 2, 1] obs_a = y.assay("sums")[:, 0] @@ -27,7 +27,7 @@ def test_aggregate_across_cells_combinations(): y = scranpy.aggregate_across_cells(x, {"group": groups, "batch": batches}) assert y.col_data.column("group") == ["A", "A", "B", "B"] - assert y.col_data.column("batch") == [1, 2, 1, 2] + assert y.col_data.column("batch") == ["1", "2", "1", "2"] assert list(y.col_data.column("counts")) == [2, 2, 2, 2] obs_a = y.assay("sums")[:, 1] @@ -41,10 +41,10 @@ def test_aggregate_across_cells_combinations(): # Try out different input types. y2 = scranpy.aggregate_across_cells(x, (groups, batches)) assert y2.col_data.column("factor_1") == ["A", "A", "B", "B"] - assert y2.col_data.column("factor_2") == [1, 2, 1, 2] + assert y2.col_data.column("factor_2") == ["1", "2", "1", "2"] assert (y2.assay("sums") == y.assay("sums")).all() y2 = scranpy.aggregate_across_cells(x, BiocFrame({"g": groups, "b": batches})) assert y2.col_data.column("g") == ["A", "A", "B", "B"] - assert y2.col_data.column("b") == [1, 2, 1, 2] + assert y2.col_data.column("b") == ["1", "2", "1", "2"] assert (y2.assay("detected") == y.assay("detected")).all() diff --git a/tests/test_analyze.py b/tests/test_analyze.py index 7fce5d4..0bbf5d9 100644 --- a/tests/test_analyze.py +++ b/tests/test_analyze.py @@ -136,4 +136,4 @@ def test_analyze_summarizedexperiment(mock_data): sce.alternative_experiments = { "adt": adt_se } out = analyze_sce(sce, adt_exp = "adt", assay_type="counts") assert out.gene_variances.row_names == se.row_names - assert out.adt_markers[0].row_names == adt_se.row_names + assert out.adt_markers["0"].row_names == adt_se.row_names diff --git a/tests/test_score_markers.py b/tests/test_score_markers.py index 4d636cc..a6b9ec3 100644 --- a/tests/test_score_markers.py +++ b/tests/test_score_markers.py @@ -18,29 +18,29 @@ def test_score_markers(mock_data): res = score_markers(out, grouping=grouping) assert res is not None - assert "means" in res[1].columns - assert isinstance(res[1].column("delta_detected"), BiocFrame) + assert "means" in res["1"].columns + assert isinstance(res["1"].column("delta_detected"), BiocFrame) # Works when blocks are supplied. resb = score_markers( out, grouping=grouping, options=ScoreMarkersOptions(block=mock_data.block) ) assert resb is not None - assert "detected" in resb[1].columns - assert isinstance(resb[1].column("lfc"), BiocFrame) + assert "detected" in resb["1"].columns + assert isinstance(resb["1"].column("lfc"), BiocFrame) # Same results in parallel. resp = score_markers( out, grouping=grouping, options=ScoreMarkersOptions(num_threads=3) ) - assert (res[0].column("means") == resp[0].column("means")).all() + assert (res["0"].column("means") == resp["0"].column("means")).all() assert ( - res[1].column("cohen").column("mean") == resp[1].column("cohen").column("mean") + res["1"].column("cohen").column("mean") == resp["1"].column("cohen").column("mean") ).all() assert ( - res[2].column("auc").column("min_rank") - == resp[2].column("auc").column("min_rank") + res["2"].column("auc").column("min_rank") + == resp["2"].column("auc").column("min_rank") ).all() assert ( - res[3].column("lfc").column("min") == resp[3].column("lfc").column("min") + res["3"].column("lfc").column("min") == resp["3"].column("lfc").column("min") ).all() diff --git a/tests/test_suggest_adt_qc_filters.py b/tests/test_suggest_adt_qc_filters.py index 3fed2ad..14c6499 100644 --- a/tests/test_suggest_adt_qc_filters.py +++ b/tests/test_suggest_adt_qc_filters.py @@ -78,6 +78,8 @@ def test_suggest_adt_qc_filters_custom(mock_data): assert list(filters_custom.column("detected")) == [4, 5, 6] assert list(filters_custom.column("subset_totals").column("foo")) == [7, 8, 9] + print("HEREEEEE") + # Now with some blocks, out of order. filters_custom = suggest_adt_qc_filters( result, @@ -93,6 +95,8 @@ def test_suggest_adt_qc_filters_custom(mock_data): ), ) + print("filters_custom", filters_custom) + assert filters_custom.shape[0] == 3 assert list(filters_custom.column("detected")) == [6, 5, 4] assert list(filters_custom.column("subset_totals").column("foo")) == [9, 8, 7]