Skip to content

Commit

Permalink
merge master to head
Browse files Browse the repository at this point in the history
  • Loading branch information
Sichao25 committed May 4, 2023
2 parents 15d896d + 9a079f5 commit a1effbc
Show file tree
Hide file tree
Showing 61 changed files with 8,699 additions and 6,643 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

Inclusive model of expression dynamics with metabolic labeling based scRNA-seq / multiomics, vector field reconstruction, potential landscape mapping, differential geometry analyses, and most probably paths / *in silico* perturbation predictions.

[Installation](https://dynamo-release.readthedocs.io/en/latest/ten_minutes_to_dynamo.html#how-to-install) - [Ten minutes to dynamo](https://dynamo-release.readthedocs.io/en/latest/ten_minutes_to_dynamo.html) - [Tutorials](https://dynamo-release.readthedocs.io/en/latest/notebooks/Differential_geometry.html) - [API](https://dynamo-release.readthedocs.io/en/latest/API.html) - [Citation](https://www.biorxiv.org/content/10.1101/696724v2) - [Theory](https://dynamo-release.readthedocs.io/en/latest/notebooks/Primer.html)
[Installation](https://dynamo-release.readthedocs.io/en/latest/ten_minutes_to_dynamo.html#how-to-install) - [Ten minutes to dynamo](https://dynamo-release.readthedocs.io/en/latest/ten_minutes_to_dynamo.html) - [Tutorials](https://dynamo-release.readthedocs.io/en/latest/notebooks/Differential_geometry.html) - [API](https://dynamo-release.readthedocs.io/en/latest/API.html) - [Citation](https://www.sciencedirect.com/science/article/pii/S0092867421015774?via%3Dihub) - [Theory](https://dynamo-release.readthedocs.io/en/latest/notebooks/Primer.html)

![Dynamo](https://user-images.githubusercontent.com/7456281/152110270-7ee1b0ed-1205-495d-9d65-59c7984d2fa2.png)

Expand Down
1 change: 1 addition & 0 deletions build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
$PYTHON setup.py install # Python command to install the script.
2 changes: 2 additions & 0 deletions conda_recipe/bld.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
"%PYTHON%" setup.py install
if errorlevel 1 exit 1
1 change: 1 addition & 0 deletions conda_recipe/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
$PYTHON setup.py install # Python command to install the script.
57 changes: 57 additions & 0 deletions conda_recipe/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package:
name: "dynamo-release"
version: "1.2.0"

source:
git_rev: v1.2.0
git_url: https://github.com/aristoteleo/dynamo-release.git

build:
preserve_egg_dir: True

requirements:
host:
- python
- typing-extensions
- openpyxl
- get_version
- numpy 1.23.*
- markdown 3.3.*
build:
- python
runs:
- python
- setuptools
- numpy 1.23.*
- pandas>=1.3.5
- scipy>=1.4.1
- scikit-learn>=0.19.1
- anndata>=0.8.0
- matplotlib>=3.5.3
- nxviz==0.7.3
- get_version>=3.5.4
- PATSY
- statsmodels
- numba>=0.54.0
- seaborn>=0.9.0
- colorcet>=2.0.1
- tqdm
- python-igraph>=0.7.1
- cdlib>=0.2.6
- louvain==0.8.0
- pynndescent>=0.5.2
- pre-commit
- networkx
- typing-extensions
- openpyxl

about:
home: https://github.com/aristoteleo/dynamo-release
license: MIT
license_familY: MIT
license_file: LICENSE
summary: "Inclusive model of expression dynamics with metabolic labeling based scRNA-seq / multiomics, vector field reconstruction, potential landscape mapping, differential geometry analyses, and most probably paths / in silico perturbation predictions."

extra:
recipe-maintainers:
- Xiaojieqiu
73 changes: 67 additions & 6 deletions dynamo/configuration.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import warnings
from typing import List, Optional, Tuple, Union

import colorcet
import matplotlib
Expand All @@ -9,7 +10,7 @@
from cycler import cycler
from matplotlib import cm, colors, rcParams

from .dynamo_logger import main_info, main_warning
from .dynamo_logger import main_debug, main_info


class DynamoAdataKeyManager:
Expand Down Expand Up @@ -62,16 +63,19 @@ def gen_layer_pearson_residual_key(layer: str):
)

def select_layer_data(adata: AnnData, layer: str, copy=False) -> pd.DataFrame:
"""select layer data based on layer key. The default layer is X layer in adata.
For layer-like data such as X stored in adata.X (but not in adata.layers) and protein data specified by dynamo convention,
this utility provides an unified interface for selecting layer data with shape n_obs x n_var."""
"""This utility provides a unified interface for selecting layer data.
The default layer is X layer in adata with shape n_obs x n_var. For protein data it selects adata.obsm["protein"]
as specified by dynamo convention (the number of proteins are generally less than detected genes `n_var`).
For other layer data, select data based on layer key with shape n_obs x n_var.
"""
if layer is None:
layer = DynamoAdataKeyManager.X_LAYER
res_data = None
if layer == DynamoAdataKeyManager.X_LAYER:
res_data = adata.X
elif layer == DynamoAdataKeyManager.PROTEIN_LAYER:
res_data = adata.obsm["protein"]
res_data = adata.obsm["protein"] if "protein" in adata.obsm_keys() else None
else:
res_data = adata.layers[layer]
if copy:
Expand Down Expand Up @@ -102,6 +106,8 @@ def check_if_layer_exist(adata: AnnData, layer: str) -> bool:
def get_available_layer_keys(adata, layers="all", remove_pp_layers=True, include_protein=True):
"""Get the list of available layers' keys. If `layers` is set to all, return a list of all available layers; if `layers` is set to a list, then the intersetion of available layers and `layers` will be returned."""
layer_keys = list(adata.layers.keys())
if layers is None: # layers=adata.uns["pp"]["experiment_layers"], in calc_sz_factor
layers = "X"
if remove_pp_layers:
layer_keys = [i for i in layer_keys if not i.startswith("X_")]

Expand Down Expand Up @@ -137,6 +143,61 @@ def allowed_X_layer_names():
def init_uns_pp_namespace(adata: AnnData):
adata.uns[DynamoAdataKeyManager.UNS_PP_KEY] = {}

def get_excluded_layers(
X_total_layers: bool = False,
splicing_total_layers: bool = False
) -> List:
"""Get a list of excluded layers based on the provided arguments.
When splicing_total_layers is False, the function normalize spliced and unspliced RNA separately using each
layer's size factors. When X_total_layers is False, the function normalize X (normally it corresponds to the
spliced RNA or total RNA for a conventional scRNA-seq or labeling scRNA-seq) using its own size factor.
Args:
X_total_layers: whether to also normalize adata.X by size factor from total RNA.
splicing_total_layers: whether to also normalize spliced / unspliced layers by size factor from total RNA.
Returns:
The list of layers to be excluded.
"""
excluded_layers = []
if not X_total_layers:
excluded_layers.extend(["X"])
if not splicing_total_layers:
excluded_layers.extend(["spliced", "unspliced"])
return excluded_layers

def aggregate_layers_into_total(
_adata: AnnData,
layers: Union[str, List[str]] = "all",
total_layers: Optional[List[str]] = None,
extend_layers: bool = True,
) -> Tuple[Optional[List[str]], Union[str, List[str]]]:
"""Create a total layer in adata by aggregating multiple layers.
The size factor normalization function is able to calculate size factors from customized layers. Given list
of total_layers, this helper function will calculate a temporary `_total_` layer.
Args:
_adata: the Anndata object.
layers: the layer(s) to be normailized in the normailzation function.
total_layers: the layer(s) to sum up to get the total mRNA. For example, ["spliced", "unspliced"],
["uu", "ul", "su", "sl"] or ["new", "old"], etc.
extend_layers: whether to extend the `_total_` layer to the list of layers.
Returns:
The tuple contains total layers and layers. Anndata object will be updated with `_total_` layer.
"""
if not isinstance(total_layers, list):
total_layers = [total_layers]
if len(set(total_layers).difference(_adata.layers.keys())) == 0:
total = None
for t_key in total_layers:
total = _adata.layers[t_key] if total is None else total + _adata.layers[t_key]
_adata.layers["_total_"] = total
if extend_layers:
layers.extend(["_total_"])
return total_layers, layers

# TODO discuss alias naming convention
DKM = DynamoAdataKeyManager
Expand Down Expand Up @@ -786,5 +847,5 @@ def set_pub_style_mpltex():

# initialize DynamoSaveConfig and DynamoVisConfig mode defaults
DynamoAdataConfig.update_data_store_mode("full")
main_info("setting visualization default mode in dynamo. Your customized matplotlib settings might be overritten.")
main_debug("setting visualization default mode in dynamo. Your customized matplotlib settings might be overwritten.")
DynamoVisConfig.set_default_mode()
11 changes: 5 additions & 6 deletions dynamo/dynamo_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,8 @@ def error(self, message, indent_level=1, *args, **kwargs):

def info_insert_adata(self, key, adata_attr="obsm", indent_level=1, *args, **kwargs):
message = "<insert> %s to %s in AnnData Object." % (key, adata_attr)
message = format_logging_message(message, logging.INFO, indent_level=indent_level)
return self.logger.error(message, *args, **kwargs)
message = format_logging_message(message, logging.DEBUG, indent_level=indent_level)
return self.logger.debug(message, *args, **kwargs)

def info_insert_adata_var(self, key, indent_level=1, *args, **kwargs):
return self.info_insert_adata(self, key, adata_attr="var", indent_level=1, *args, **kwargs)
Expand Down Expand Up @@ -189,18 +189,17 @@ def report_progress(self, percent=None, count=None, total=None, progress_name=""

def finish_progress(self, progress_name="", time_unit="s", indent_level=1):
self.log_time()
self.report_progress(percent=100, progress_name=progress_name)
# self.report_progress(percent=100, progress_name=progress_name)

saved_terminator = self.logger_stream_handler.terminator
self.logger_stream_handler.terminator = ""
self.logger.info("\n")
self.logger_stream_handler.flush()
self.logger_stream_handler.terminator = saved_terminator

if time_unit == "s":
self.info("[%s] finished [%.4fs]" % (progress_name, self.time_passed), indent_level=indent_level)
self.info("[%s] completed [%.4fs]" % (progress_name, self.time_passed), indent_level=indent_level)
elif time_unit == "ms":
self.info("[%s] finished [%.4fms]" % (progress_name, self.time_passed * 1e3), indent_level=indent_level)
self.info("[%s] completed [%.4fms]" % (progress_name, self.time_passed * 1e3), indent_level=indent_level)
else:
raise NotImplementedError
# self.logger.info("|")
Expand Down
63 changes: 17 additions & 46 deletions dynamo/plot/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from ..configuration import DynamoAdataKeyManager
from ..dynamo_logger import main_warning
from ..preprocessing import preprocess as pp
from ..preprocessing.preprocess_monocle_utils import top_table
from ..preprocessing.gene_selection import get_prediction_by_svr
from ..preprocessing.utils import detect_experiment_datatype
from ..tools.utils import get_mapper, update_dict
from .utils import save_fig
Expand Down Expand Up @@ -649,47 +649,36 @@ def feature_genes(
save_show_or_return: str = "show",
save_kwargs: dict = {},
):
"""Plot selected feature genes on top of the mean vs. dispersion scatterplot.
"""Plot selected feature genes on top of the mean vs. dispersion scatter plot.
Parameters
----------
Args:
adata: :class:`~anndata.AnnData`
AnnData object
layer: `str` (default: `X`)
The data from a particular layer (include X) used for making the feature gene plot.
mode: None or `str` (default: `None`)
The method to select the feature genes (can be either `dispersion`, `gini` or `SVR`).
The method to select the feature genes (can be either `cv_dispersion`, `fano_dispersion` or `gini`).
figsize: `string` (default: (4, 3))
Figure size of each facet.
save_show_or_return: {'show', 'save', 'return'} (default: `show`)
Whether to save, show or return the figure.
save_kwargs: `dict` (default: `{}`)
A dictionary that will passed to the save_fig function. By default it is an empty dictionary and the
A dictionary that will be passed to the save_fig function. By default, it is an empty dictionary and the
save_fig function will use the {"path": None, "prefix": 'feature_genes', "dpi": None, "ext": 'pdf',
"transparent": True, "close": True, "verbose": True} as its parameters. Otherwise you can provide a
"transparent": True, "close": True, "verbose": True} as its parameters. Otherwise, you can provide a
dictionary that properly modify those keys according to your needs.
Returns
-------
Returns:
Nothing but plots the selected feature genes via the mean, CV plot.
"""

import matplotlib.pyplot as plt

mode = adata.uns["feature_selection"] if mode is None else mode

layer = DynamoAdataKeyManager.get_available_layer_keys(adata, layer, include_protein=False)[0]

uns_store_key = None
if mode == "dispersion":
uns_store_key = "dispFitInfo" if layer in ["raw", "X"] else layer + "_dispFitInfo"

table = top_table(adata, layer)
x_min, x_max = (
np.nanmin(table["mean_expression"]),
np.nanmax(table["mean_expression"]),
)
elif mode == "SVR":
if "_dispersion" in mode: # "cv_dispersion", "fano_dispersion"
prefix = "" if layer == "X" else layer + "_"
uns_store_key = "velocyto_SVR" if layer == "raw" or layer == "X" else layer + "_velocyto_SVR"

Expand All @@ -709,11 +698,12 @@ def feature_genes(
ordering_genes = adata.var["use_for_pca"] if "use_for_pca" in adata.var.columns else None

mu_linspace = np.linspace(x_min, x_max, num=1000)
fit = (
adata.uns[uns_store_key]["disp_func"](mu_linspace)
if mode == "dispersion"
else adata.uns[uns_store_key]["SVR"](mu_linspace.reshape(-1, 1))
)
if "_dispersion" in mode:
mean = adata.uns[uns_store_key]["mean"]
cv = adata.uns[uns_store_key]["cv"]
svr_gamma = adata.uns[uns_store_key]["svr_gamma"]
fit, _ = get_prediction_by_svr(mean, cv, svr_gamma)
fit = fit(mu_linspace.reshape(-1, 1))

plt.figure(figsize=figsize)
plt.plot(mu_linspace, fit, alpha=0.4, color="r")
Expand All @@ -724,15 +714,7 @@ def feature_genes(
)

valid_disp_table = table.iloc[valid_ind, :]
if mode == "dispersion":
ax = plt.scatter(
valid_disp_table["mean_expression"],
valid_disp_table["dispersion_empirical"],
s=3,
alpha=1,
color="xkcd:red",
)
elif mode == "SVR":
if "_dispersion" in mode:
ax = plt.scatter(
valid_disp_table[prefix + "log_m"],
valid_disp_table[prefix + "log_cv"],
Expand All @@ -743,15 +725,7 @@ def feature_genes(

neg_disp_table = table.iloc[~valid_ind, :]

if mode == "dispersion":
ax = plt.scatter(
neg_disp_table["mean_expression"],
neg_disp_table["dispersion_empirical"],
s=3,
alpha=0.5,
color="xkcd:grey",
)
elif mode == "SVR":
if "_dispersion" in mode:
ax = plt.scatter(
neg_disp_table[prefix + "log_m"],
neg_disp_table[prefix + "log_cv"],
Expand All @@ -760,9 +734,6 @@ def feature_genes(
color="xkcd:grey",
)

# plt.xlim((0, 100))
if mode == "dispersion":
plt.xscale("log")
plt.yscale("log")
plt.xlabel("Mean (log)")
plt.ylabel("Dispersion (log)") if mode == "dispersion" else plt.ylabel("CV (log)")
Expand Down Expand Up @@ -1080,7 +1051,7 @@ def highest_frac_genes(

else:
main_warning(
"%s not in adata.var, ignoring the gene annotation key when plotting",
"%s not in adata.var, ignoring the gene annotation key when plotting" % gene_annotation_key,
indent_level=2,
)

Expand Down
4 changes: 1 addition & 3 deletions dynamo/prediction/perturbation.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,10 @@
from ..vectorfield.scVectorField import KOVectorField, vector_field_function_knockout
from ..vectorfield.vector_calculus import (
jacobian,
rank_cell_groups,
rank_cells,
rank_genes,
vecfld_from_adata,
vector_transformation,
)
from ..vectorfield.rank_vf import rank_cell_groups, rank_cells, rank_genes
from .utils import expr_to_pca, pca_to_expr, z_score, z_score_inv


Expand Down
Loading

0 comments on commit a1effbc

Please sign in to comment.