Overhauled the documentation, streamlined the type hinting.

SingleR-inc · Sep 20, 2023 · 46ce6dc · 46ce6dc
1 parent a4d3ee0
commit 46ce6dc
Show file tree

Hide file tree

Showing 12 changed files with 118 additions and 135 deletions.
diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml
@@ -29,7 +29,8 @@ jobs:
           python -m pip install --upgrade pip setuptools
           DEPENDENCIES=$(python -c 'from setuptools.config.setupcfg import read_configuration as c; a = c("setup.cfg"); print(" ".join(a["options"]["install_requires"][1:]))')
           pip install ${DEPENDENCIES}
-          pip install sphinx myst-parser[linkify] furo
+          DOCDEPENDENCIES=$(python -c 'with open("docs/requirements.txt") as a: available = list(a); print(" ".join(map(lambda x : x.strip(), filter(lambda x : not x.startswith("#"), available))))')
+          pip install ${DOCDEPENDENCIES}
 
       # Note that doc building requires the inplace shared library.
       - name: Build docs

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,7 +1,5 @@
 # Changelog
 
-## Version 0.1 (development)
+## Version 0.1.0
 
-- Feature A added
-- FIX: nasty bug #1729 fixed
-- add your changes here!
+- Added the `annotate_integrated()` function, which performs annotation across multiple references.
diff --git a/docs/conf.py b/docs/conf.py
@@ -72,6 +72,7 @@
     "sphinx.ext.ifconfig",
     "sphinx.ext.mathjax",
     "sphinx.ext.napoleon",
+    "sphinx_autodoc_typehints",
 ]
 
 # Add any paths that contain templates here, relative to this directory.
@@ -169,7 +170,7 @@
 autodoc_default_options = {
     'special-members': True,
     'undoc-members': False,
-    'exclude-members': '__weakref__, __dict__, __str__, __module__, __init__'
+    'exclude-members': '__weakref__, __dict__, __str__, __module__, __init__, __del__'
 }
 
 autosummary_generate = True
@@ -308,6 +309,9 @@
     "scipy": ("https://docs.scipy.org/doc/scipy/reference", None),
     "setuptools": ("https://setuptools.pypa.io/en/stable/", None),
     "pyscaffold": ("https://pyscaffold.org/en/stable", None),
+    "biocframe": ("https://biocpy.github.io/BiocFrame", None),
+    "summarizedexperiment": ("https://biocpy.github.io/SummarizedExperiment", None),
+    "singlecellexperiment": ("https://biocpy.github.io/SingleCellExperiment", None),
 }
 
-print(f"loading configurations for {project} {version} ...", file=sys.stderr)
+print(f"loading configurations for {project} {version} ...", file=sys.stderr)
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -5,3 +5,4 @@ furo
 # sphinx_rtd_theme
 myst-parser[linkify]
 sphinx>=3.2.1
+sphinx-autodoc-typehints
diff --git a/src/singler/annotate_integrated.py b/src/singler/annotate_integrated.py
@@ -16,11 +16,11 @@ def annotate_integrated(
     ref_labels: Union[str, Sequence[Union[Sequence, str]]],
     ref_features: Union[str, Sequence[Union[Sequence, str]]],
     cache_dir: Optional[str] = None,
-    build_single_args={},
-    classify_single_args={},
-    build_integrated_args={},
-    classify_integrated_args={},
-    num_threads=1,
+    build_single_args: dict = {},
+    classify_single_args: dict = {},
+    build_integrated_args: dict = {},
+    classify_integrated_args: dict = {},
+    num_threads: int = 1,
 ) -> Tuple[list[BiocFrame], BiocFrame]:
     """Annotate a single-cell expression dataset based on the correlation 
     of each cell to profiles in multiple labelled references, where the
@@ -31,10 +31,10 @@ def annotate_integrated(
             features and columns are samples (usually cells). Entries should be expression
             values; only the ranking within each column will be used.
 
-        test_features (Sequence): Sequence of length equal to the number of rows in
+        test_features: Sequence of length equal to the number of rows in
             ``test_data``, containing the feature identifier for each row.
 
-        ref_data: 
+        ref_data:
             Sequence consisting of one or more of the following:
 
             - A matrix-like object representing the reference dataset, where rows
@@ -45,7 +45,7 @@ def annotate_integrated(
               :py:meth:`~singler.fetch_reference.fetch_github_reference`.
               This will use the specified dataset as the reference.
 
-        ref_labels (Union[str, Sequence[Union[Sequence, str]]]):
+        ref_labels:
             Sequence of the same length as ``ref_data``, where the contents
             depend on the type of value in the corresponding entry of ``ref_data``:
 
@@ -54,10 +54,9 @@ def annotate_integrated(
               containing the label associated with each column.
             - If ``ref_data[i]`` is a string, ``ref_labels[i]`` should be a string
               specifying the label type to use, e.g., "main", "fine", "ont".
+              If a single string is supplied, it is recycled for all ``ref_data``.
 
-             If a single string is supplied, it is recycled for all ``ref_data``.
-
-        ref_features (Union[str, Sequence[Union[Sequence, str]]]):
+        ref_features:
             Sequence of the same length as ``ref_data``, where the contents
             depend on the type of value in the corresponding entry of ``ref_data``:
 
@@ -66,37 +65,35 @@ def annotate_integrated(
               containing the feature identifier associated with each row.
             - If ``ref_data[i]`` is a string, ``ref_features[i]`` should be a string
               specifying the feature type to use, e.g., "ensembl", "symbol".
+              If a single string is supplied, it is recycled for all ``ref_data``.
 
-             If a single string is supplied, it is recycled for all ``ref_data``.
-
-        cache_dir (str):
+        cache_dir:
             Path to a cache directory for downloading reference files, see
             :py:meth:`~singler.fetch_reference.fetch_github_reference` for details.
             Only used if ``ref_data`` is a string.
 
-        build_single_args (dict):
+        build_single_args:
             Further arguments to pass to
             :py:meth:`~singler.build_single_reference.build_single_reference`.
 
-        classify_single_args (dict):
+        classify_single_args:
             Further arguments to pass to
             :py:meth:`~singler.classify_single_reference.classify_single_reference`.
 
-        build_integrated_args (dict):
+        build_integrated_args:
             Further arguments to pass to
-            :py:meth:`~singler.build_integrated_reference.build_integrated_reference`.
+            :py:meth:`~singler.build_integrated_references.build_integrated_references`.
 
-        classify_integrated_args (dict):
+        classify_integrated_args:
             Further arguments to pass to
-            :py:meth:`~singler.classify_integrated_reference.classify_integrated_reference`.
+            :py:meth:`~singler.classify_integrated_references.classify_integrated_references`.
 
-        num_threads (int):
+        num_threads:
             Number of threads to use for the various steps.
 
     Returns:
-        Tuple[list[BiocFrame], BiocFrame]: Tuple where the first element
-        contains per-reference results (i.e. a list of BiocFrame outputs
-        equivalent to running
+        Tuple where the first element contains per-reference results (i.e. a
+        list of BiocFrame outputs equivalent to running
         :py:meth:`~singler.annotate_single.annotate_single` on each reference)
         and the second element contains integrated results across references
         (i.e., a BiocFrame from

diff --git a/src/singler/annotate_single.py b/src/singler/annotate_single.py
@@ -1,4 +1,4 @@
-from typing import Union, Sequence, Optional
+from typing import Union, Sequence, Optional, Any
 from biocframe import BiocFrame
 
 from .fetch_reference import fetch_github_reference, realize_github_markers
@@ -49,15 +49,15 @@ def _build_reference(ref_data, ref_labels, ref_features, test_features_set, cach
 
 
 def annotate_single(
-    test_data,
+    test_data: Any,
     test_features: Sequence,
-    ref_data,
+    ref_data: Any,
     ref_labels: Union[Sequence, str],
     ref_features: Union[Sequence, str],
     cache_dir: Optional[str] = None,
-    build_args={},
-    classify_args={},
-    num_threads=1,
+    build_args: dict = {},
+    classify_args: dict = {},
+    num_threads: int = 1,
 ) -> BiocFrame:
     """Annotate a single-cell expression dataset based on the correlation 
     of each cell to profiles in a labelled reference.
@@ -67,7 +67,7 @@ def annotate_single(
             features and columns are samples (usually cells). Entries should be expression
             values; only the ranking within each column will be used.
 
-        test_features (Sequence): Sequence of length equal to the number of rows in
+        test_features: Sequence of length equal to the number of rows in
             ``test_data``, containing the feature identifier for each row.
 
         ref_data: A matrix-like object representing the reference dataset, where rows
@@ -79,40 +79,40 @@ def annotate_single(
             :py:meth:`~singler.fetch_reference.fetch_github_reference`.
             This will use the specified dataset as the reference.
 
-        ref_labels (Union[Sequence, str]):
+        ref_labels:
             If ``ref_data`` is a matrix-like object, ``ref_labels`` should be
             a sequence of length equal to the number of columns of ``ref_data``,
             containing the label associated with each column.
 
             If ``ref_data`` is a string, ``ref_labels`` should be a string
             specifying the label type to use, e.g., "main", "fine", "ont".
 
-        ref_features (Union[Sequence, str]):
+        ref_features:
             If ``ref_data`` is a matrix-like object, ``ref_features`` should be
             a sequence of length equal to the number of rows of ``ref_data``,
             containing the feature identifier associated with each row.
 
             If ``ref_data`` is a string, ``ref_features`` should be a string
             specifying the label type to use, e.g., "ensembl", "symbol".
 
-        cache_dir (str):
+        cache_dir:
             Path to a cache directory for downloading reference files, see
             :py:meth:`~singler.fetch_reference.fetch_github_reference` for details.
             Only used if ``ref_data`` is a string.
 
-        build_args (dict):
+        build_args:
             Further arguments to pass to
             :py:meth:`~singler.build_single_reference.build_single_reference`.
 
-        classify_args (dict):
+        classify_args:
             Further arguments to pass to
             :py:meth:`~singler.classify_single_reference.classify_single_reference`.
 
-        num_threads (int):
+        num_threads:
             Number of threads to use for the various steps.
 
     Returns:
-        BiocFrame: A data frame containing the labelling results, see
+        A data frame containing the labelling results, see
         :py:meth:`~singler.classify_single_reference.classify_single_reference`
         for details. The metadata also contains a ``markers`` dictionary,
         specifying the markers that were used for each pairwise comparison

diff --git a/src/singler/build_integrated_references.py b/src/singler/build_integrated_references.py
@@ -30,7 +30,8 @@ def reference_names(self) -> Union[Sequence[str], None]:
     def reference_labels(self) -> list:
         """List of lists containing the names of the labels for each reference.
 
-        Each entry corresponds to a reference in :py:attr:`~names`, if not None.
+        Each entry corresponds to a reference in :py:attr:`~reference_names`, 
+        if ``reference_names`` is not None.
         """
         return self._labels
 
@@ -47,32 +48,34 @@ def build_integrated_references(
     ref_features_list: list[Sequence],
     ref_prebuilt_list: list[SinglePrebuiltReference],
     ref_names: Optional[Sequence[str]] = None,
-    num_threads=1,
+    num_threads: int = 1,
 ) -> IntegratedReferences:
     """Build a set of integrated references for classification of a test dataset.
 
     Arguments:
-        test_features (Sequence): Sequence of features for the test dataset.
+        test_features: Sequence of features for the test dataset.
 
-        ref_data_list (list): List of reference datasets, equivalent to ``ref_data`` in
+        ref_data_list: List of reference datasets, where each entry is equivalent to ``ref_data`` in
             :py:meth:`~singler.build_single_reference.build_single_reference`.
 
-        ref_labels_list (list[Sequence]): List of reference labels, equivalent to ``ref_labels`` in
+        ref_labels_list: List of reference labels, where each entry is equivalent to ``ref_labels`` in
             :py:meth:`~singler.build_single_reference.build_single_reference`.
 
-        ref_features_list (list[Sequence]): List of reference features, equivalent to ``ref_features`` in
+        ref_features_list: List of reference features, where each entry is equivalent to ``ref_features`` in
             :py:meth:`~singler.build_single_reference.build_single_reference`.
 
-        ref_prebuilt_list (list[SinglePrebuiltReference]): List of prebuilt references.
+        ref_prebuilt_list: List of prebuilt references, typically created by 
+            calling :py:meth:`~singler.build_single_reference.build_single_reference` on the corresponding
+            elements of ``ref_data_list``, ``ref_labels_list`` and ``ref_features_list``.
 
-        ref_names (Sequence[str], optional): Sequence of names for the references.
+        ref_names: Sequence of names for the references.
             If None, these are automatically generated.
 
-        num_threads (int):
+        num_threads:
             Number of threads.
 
     Returns:
-        IntegratedReferences: Integrated references for classification with
+        Integrated references for classification with
         :py:meth:`~singler.classify_integrated_references.classify_integrated_references`.
     """
     universe = _stable_union(test_features, *ref_features_list)