Merge branch 'main' into master

scverse · Oct 31, 2024 · ee590b7 · ee590b7
2 parents 014be18 + 0135489
commit ee590b7
Show file tree

Hide file tree

Showing 14 changed files with 293 additions and 152 deletions.
diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
@@ -2,9 +2,9 @@ name: Python package
 
 on:
   push:
-    branches: [master]
+    branches: [main]
   pull_request:
-    branches: [master]
+    branches: [main]
   schedule:
     - cron: "0 5 1,15 * *"
 
@@ -15,7 +15,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.8, 3.12]
+        python-version: ["3.10", "3.12"]
 
     steps:
     - uses: actions/checkout@v4

diff --git a/.gitignore b/.gitignore
@@ -6,6 +6,9 @@ __pycache__/
 # C extensions
 *.so
 
+# cached data
+data/
+
 # Distribution / packaging
 .Python
 build/

diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
@@ -10,11 +10,26 @@ Release notes
    *
 
 
+v0.1.7
+------
+
+* Enable compatibility of in-place filtering with the latest anndata releases
+
+* :func:`muon.pp.tfidf` when using data from a layer
+
+* Fix custom chromosome names in :func:`muon.atac.tl.count_fragments_features`
+
+* Prepare to count unique fragments in :func:`muon.atac.tl.count_fragments_features` from the next version
+
+* Improve :func:`muon.pl.scatter`
+
 v0.1.6
 ------
 
 * Compatibility with scanpy 1.10
 
+* Extend ``_l2norm`` to sparse inputs.
+
 v0.1.5
 ------
 

diff --git a/muon/__init__.py b/muon/__init__.py
@@ -13,4 +13,4 @@
 from . import atac
 from . import prot
 
-__version__ = "0.1.6"
+__version__ = "0.1.7"
diff --git a/muon/_atac/preproc.py b/muon/_atac/preproc.py
@@ -96,14 +96,14 @@ def tfidf(
         tf = np.dot(n_peaks, counts)
     else:
         n_peaks = np.asarray(counts.sum(axis=1)).reshape(-1, 1)
-        tf = adata.X / n_peaks
+        tf = counts / n_peaks
 
     if scale_factor is not None and scale_factor != 0 and scale_factor != 1:
         tf = tf * scale_factor
     if log_tf:
         tf = np.log1p(tf)
 
-    idf = np.asarray(adata.shape[0] / adata.X.sum(axis=0)).reshape(-1)
+    idf = np.asarray(adata.shape[0] / counts.sum(axis=0)).reshape(-1)
     if log_idf:
         idf = np.log1p(idf)
 
@@ -116,7 +116,7 @@ def tfidf(
     if log_tfidf:
         tf_idf = np.log1p(tf_idf)
 
-    res = np.nan_to_num(tf_idf, 0)
+    res = np.nan_to_num(tf_idf, nan=0.0)
     if not inplace:
         return res
 

diff --git a/muon/_atac/tools.py b/muon/_atac/tools.py
@@ -751,6 +751,7 @@ def count_fragments_features(
     stranded: bool = False,
     extend_upstream: int = 2e3,
     extend_downstream: int = 0,
+    count_reads: bool = True,
 ) -> AnnData:
     """
     Count fragments overlapping given Features. Returns cells x features matrix.
@@ -772,6 +773,12 @@ def count_fragments_features(
                 Number of nucleotides to extend every gene upstream (2000 by default to extend gene coordinates to promoter regions)
         extend_downstream
                 Number of nucleotides to extend every gene downstream (0 by default)
+        count_reads: bool (True by default)
+                NOTE: default will be changed to False from v0.2.
+                If to count reads instead of fragments.
+                If True, the number of reads (read support) per fragment will be used.
+                This will also include duplicate read pairs.
+                If False, `1` will be added for each fragment.
     """
     if isinstance(data, AnnData):
         adata = data
@@ -805,11 +812,18 @@ def count_fragments_features(
             "pysam is not available. It is required to work with the fragments file. Install pysam from PyPI (`pip install pysam`) or from GitHub (`pip install git+https://github.com/pysam-developers/pysam`)"
         )
 
+    if count_reads:
+        warn(
+            f"From v0.2, by default, unique fragments will be counted instead of reads. See muon#110 for details.",
+            FutureWarning,
+            stacklevel=2,
+        )
+
     n = adata.n_obs
     n_features = features.shape[0]
 
     # TODO: refactor and reuse this code
-    # TODO: write tests (see #59, #68)
+    # TODO: write tests (see #59, #68, #110)
 
     f_cols = np.array([col.lower() for col in features.columns.values])
     for col in ("start", "end"):
@@ -853,11 +867,16 @@ def count_fragments_features(
                 f_from = f[start_col] - extend_upstream
                 f_to = f[end_col] + extend_downstream
 
+
             for fr in fragments.fetch(f[chr_col], f_from, f_to, parser=pysam.asBed()):
                 try:
                     ind = adata.obs.index.get_loc(fr.name)  # cell barcode (e.g. GTCAGTCAGTCAGTCA-1)
                     mx.rows[i].append(ind)
-                    mx.data[i].append(int(fr.score))  # number of cuts per fragment (e.g. 2)
+                    if count_reads:
+                        # number of read pairs associated with the fragment
+                        mx.data[i].append(int(fr.score))
+                    else:
+                        mx.data[i].append(1)
                 except:
                     pass
         # The connection has to be closed

diff --git a/muon/_core/plot.py b/muon/_core/plot.py
@@ -1,4 +1,4 @@
-from typing import Union, List, Optional, Iterable, Sequence, Dict
+from typing import Dict, Iterable, List, Optional, Sequence, Union
 import warnings
 
 from matplotlib.axes import Axes
@@ -43,7 +43,7 @@ def scatter(
     y : Optional[str]
         y coordinate
     color : Optional[Union[str, Sequence[str]]], optional (default: None)
-        Keys for variables or annotations of observations (.obs columns),
+        Keys or a single key for variables or annotations of observations (.obs columns),
         or a hex colour specification.
     use_raw : Optional[bool], optional (default: None)
         Use `.raw` attribute of the modality where a feature (from `color`) is derived from.
@@ -53,9 +53,7 @@ def scatter(
         No layer is used by default. A single layer value will be expanded to [layer, layer, layer].
     """
     if isinstance(data, AnnData):
-        return sc.pl.embedding(
-            data, x=x, y=y, color=color, use_raw=use_raw, layers=layers, **kwargs
-        )
+        return sc.pl.scatter(data, x=x, y=y, color=color, use_raw=use_raw, layers=layers, **kwargs)
 
     if isinstance(layers, str) or layers is None:
         layers = [layers, layers, layers]
@@ -72,10 +70,9 @@ def scatter(
         if isinstance(color, str):
             color_obs = _get_values(data, color, use_raw=use_raw, layer=layers[2])
             color_obs = pd.DataFrame({color: color_obs})
-            color = [color]
         else:
-            # scanpy#311 / scanpy#1497 has to be fixed for this to work
             color_obs = _get_values(data, color, use_raw=use_raw, layer=layers[2])
+
         color_obs.index = data.obs_names
         obs = pd.concat([obs, color_obs], axis=1, ignore_index=False)
 
@@ -86,11 +83,10 @@ def scatter(
     # and are now stored in .obs
     retval = sc.pl.scatter(ad, x=x, y=y, color=color, **kwargs)
     if color is not None:
-        for col in color:
-            try:
-                data.uns[f"{col}_colors"] = ad.uns[f"{col}_colors"]
-            except KeyError:
-                pass
+        try:
+            data.uns[f"{color}_colors"] = ad.uns[f"{color}_colors"]
+        except KeyError:
+            pass
     return retval