From a66f2351c28477ae6d0454600de127e9c0e6475f Mon Sep 17 00:00:00 2001 From: sichao Date: Tue, 20 Feb 2024 12:07:34 -0500 Subject: [PATCH 1/2] update docstring and typing for root level scripts --- dynamo/configuration.py | 182 +++++++++++++++++------------------ dynamo/data_io.py | 87 ++++++++--------- dynamo/dynamo_logger.py | 206 +++++++++++++++++++++++++--------------- dynamo/get_version.py | 39 +++++--- dynamo/sample_data.py | 169 +++++++++++++------------------- dynamo/utils.py | 88 ++++++++--------- 6 files changed, 392 insertions(+), 379 deletions(-) diff --git a/dynamo/configuration.py b/dynamo/configuration.py index 4a48ed33f..d05c0fc20 100755 --- a/dynamo/configuration.py +++ b/dynamo/configuration.py @@ -1,5 +1,5 @@ import warnings -from typing import List, Optional, Tuple, Union +from typing import Any, List, Optional, Tuple, Union import colorcet import matplotlib @@ -14,6 +14,7 @@ class DynamoAdataKeyManager: + """A class to manage the keys used in anndata object for dynamo.""" VAR_GENE_MEAN_KEY = "pp_gene_mean" VAR_GENE_VAR_KEY = "pp_gene_variance" VAR_GENE_HIGHLY_VARIABLE_KEY = "gene_highly_variable" @@ -38,8 +39,9 @@ class DynamoAdataKeyManager: PROTEIN_LAYER = "protein" X_PCA = "X_pca" - def gen_new_layer_key(layer_name, key, sep="_") -> str: - """utility function for returning a new key name for a specific layer. By convention layer_name should not have the separator as the last character.""" + def gen_new_layer_key(layer_name: str, key: str, sep: str = "_") -> str: + """Utility function for returning a new key name for a specific layer. By convention layer_name should not have + the separator as the last character.""" if layer_name == "": return key if layer_name[-1] == sep: @@ -50,14 +52,15 @@ def gen_layer_pp_key(*keys): """Generate dynamo style keys for adata.uns[pp][key0_key1_key2...]""" return "_".join(keys) - def gen_layer_X_key(key): + def gen_layer_X_key(key: str) -> str: """Generate dynamo style keys for adata.layer[X_*], used later in dynamics""" return DynamoAdataKeyManager.gen_new_layer_key("X", key) - def is_layer_X_key(key): + def is_layer_X_key(key: str) -> bool: + """Check if the key is a layer key for X layer.""" return key[:2] == "X_" - def gen_layer_pearson_residual_key(layer: str): + def gen_layer_pearson_residual_key(layer: str) -> str: """Generate dynamo style keys for adata.uns[pp][key0_key1_key2...]""" return DynamoAdataKeyManager.gen_layer_pp_key( layer, DynamoAdataKeyManager.UNS_PP_PEARSON_RESIDUAL_NORMALIZATION @@ -83,7 +86,8 @@ def select_layer_data(adata: AnnData, layer: str, copy=False) -> pd.DataFrame: return res_data.copy() return res_data - def set_layer_data(adata: AnnData, layer: str, vals: np.array, var_indices: np.array = None): + def set_layer_data(adata: AnnData, layer: str, vals: np.array, var_indices: np.array = None) -> None: + """This utility provides a unified interface for setting data to layers.""" if var_indices is None: var_indices = slice(None) if layer == DynamoAdataKeyManager.X_LAYER: @@ -96,6 +100,7 @@ def set_layer_data(adata: AnnData, layer: str, vals: np.array, var_indices: np.a adata.layers[layer] = vals def check_if_layer_exist(adata: AnnData, layer: str) -> bool: + """Check if the layer exists in adata.""" if layer == DynamoAdataKeyManager.X_LAYER: # assume always exist return True @@ -104,8 +109,9 @@ def check_if_layer_exist(adata: AnnData, layer: str) -> bool: return layer in adata.layers - def get_available_layer_keys(adata, layers="all", remove_pp_layers=True, include_protein=True): - """Get the list of available layers' keys. If `layers` is set to all, return a list of all available layers; if `layers` is set to a list, then the intersetion of available layers and `layers` will be returned.""" + def get_available_layer_keys(adata, layers="all", remove_pp_layers=True, include_protein=True) -> List[str]: + """Get the list of available layers' keys. If `layers` is set to all, return a list of all available layers; if + `layers` is set to a list, then the intersetion of available layers and `layers` will be returned.""" layer_keys = list(adata.layers.keys()) if layers is None: # layers=adata.uns["pp"]["experiment_layers"], in calc_sz_factor layers = "X" @@ -120,13 +126,15 @@ def get_available_layer_keys(adata, layers="all", remove_pp_layers=True, include res_layers = list(set(res_layers).difference(["matrix", "ambiguous", "spanning"])) return res_layers - def allowed_layer_raw_names(): + def allowed_layer_raw_names() -> Tuple[List[str], List[str], List[str]]: + """Return a list of allowed layer names in raw data.""" only_splicing = ["spliced", "unspliced"] only_labeling = ["new", "total"] splicing_and_labeling = ["uu", "ul", "su", "sl"] return only_splicing, only_labeling, splicing_and_labeling def get_raw_data_layers(adata: AnnData) -> str: + """Get the list of raw data layers names in adata.""" only_splicing, only_labeling, splicing_and_labeling = DKM.allowed_layer_raw_names() # select layers in adata to be normalized res = only_splicing + only_labeling + splicing_and_labeling @@ -134,14 +142,16 @@ def get_raw_data_layers(adata: AnnData) -> str: res = list(res) return res - def allowed_X_layer_names(): + def allowed_X_layer_names() -> Tuple[List[str], List[str], List[str]]: + """Return a list of allowed layer names in X layers data.""" only_splicing = ["X_spliced", "X_unspliced"] only_labeling = ["X_new", "X_total"] splicing_and_labeling = ["X_uu", "X_ul", "X_su", "X_sl"] return only_splicing, only_labeling, splicing_and_labeling - def init_uns_pp_namespace(adata: AnnData): + def init_uns_pp_namespace(adata: AnnData) -> None: + """Initialize the uns[pp] namespace in adata.""" adata.uns[DynamoAdataKeyManager.UNS_PP_KEY] = {} def get_excluded_layers(X_total_layers: bool = False, splicing_total_layers: bool = False) -> List: @@ -203,12 +213,14 @@ def aggregate_layers_into_total( class DynamoVisConfig: + """Dynamo visualization config class holding static variables to change behaviors of functions globally.""" def set_default_mode(background="white"): + """Set the default mode for dynamo visualization.""" set_figure_params("dynamo", background=background) class DynamoAdataConfig: - """dynamo anndata object config class holding static variables to change behaviors of functions globally.""" + """Dynamo anndata object config class holding static variables to change behaviors of functions globally.""" # set the adata store mode. # saving memory or storing more results @@ -252,22 +264,17 @@ class DynamoAdataConfig: # config_key_to_values contains _key to values for config values config_key_to_values = None - def use_default_var_if_none(val, key, replace_val=None): - """if `val` is equal to `replace_val`, then a config value will be returned according to `key` stored in dynamo configuration. Otherwise return the original `val` value. + def use_default_var_if_none(val: Any, key: str, replace_val: Optional[Any] = None) -> Any: + """If `val` is equal to `replace_val`, then a config value will be returned according to `key` stored in dynamo + configuration. Otherwise return the original `val` value. - Parameters - ---------- - val : - The input value to check against. - key : - `key` stored in the dynamo configuration. E.g DynamoAdataConfig.RECIPE_MONOCLE_KEEP_RAW_LAYERS_KEY - replace_val : - the target value to replace, by default None + Args: + val: The input value to check against. + key: `key` stored in the dynamo configuration. E.g DynamoAdataConfig.RECIPE_MONOCLE_KEEP_RAW_LAYERS_KEY. + replace_val: The target value to replace, by default None - Returns - ------- + Returns: `val` or config value set in DynamoAdataConfig according to the method description above. - """ if not key in DynamoAdataConfig.config_key_to_values: assert KeyError("Config %s not exist in DynamoAdataConfig." % (key)) @@ -277,7 +284,8 @@ def use_default_var_if_none(val, key, replace_val=None): return config_val return val - def update_data_store_mode(mode): + def update_data_store_mode(mode: str) -> None: + """Update the data store mode for dynamo anndata object.""" DynamoAdataConfig.data_store_mode = mode # default succinct for recipe*, except for recipe_monocle @@ -311,7 +319,8 @@ def update_data_store_mode(mode): } -def update_data_store_mode(mode): +def update_data_store_mode(mode: str) -> None: + """Update the data store mode for dynamo anndata object.""" DynamoAdataConfig.update_data_store_mode(mode) @@ -495,7 +504,8 @@ def update_data_store_mode(mode): # } -def dyn_theme(background="white"): +def dyn_theme(background: str = "white") -> None: + """Set the dynamo theme for matplotlib.rcParams.""" # https://github.com/matplotlib/matplotlib/blob/master/lib/matplotlib/mpl-data/stylelib/dark_background.mplstyle if background == "black": @@ -539,30 +549,23 @@ def dyn_theme(background="white"): def config_dynamo_rcParams( - background="white", - prop_cycle=zebrafish_256, - fontsize=8, - color_map=None, - frameon=None, -): + background: str = "white", + prop_cycle: List[str] = zebrafish_256, + fontsize: float = 8, + color_map: Optional[str] = None, + frameon: Optional[bool] = None, +) -> None: """Configure matplotlib.rcParams to dynamo defaults (based on ggplot style and scanpy). - Parameters - ---------- - background: `str` (default: `white`) - The background color of the plot. By default we use the white ground - which is suitable for producing figures for publication. Setting it to `black` background will - be great for presentation. - prop_cycle: `list` (default: zebrafish_256) - A list with hex color codes - fontsize: float (default: 6) - Size of font - color_map: `plt.cm` or None (default: None) - Color map - frameon: `bool` or None (default: None) - Whether to have frame for the figure. - Returns - ------- + Args: + background: The background color of the plot. By default we use the white ground which is suitable for producing + figures for publication. Setting it to `black` background will be great for presentation. + prop_cycle: A list with hex color codes + fontsize: Size of font + color_map: Color map + frameon: Whether to have frame for the figure. + + Returns: Nothing but configure the rcParams globally. """ @@ -677,50 +680,37 @@ def config_dynamo_rcParams( def set_figure_params( - dynamo=True, - background="white", - fontsize=8, - figsize=(6, 4), - dpi=None, - dpi_save=None, - frameon=None, - vector_friendly=True, - color_map=None, - format="pdf", - transparent=False, - ipython_format="png2x", + dynamo: bool = True, + background: str = "white", + fontsize: float = 8, + figsize: Tuple[float, float] = (6, 4), + dpi: Optional[int] = None, + dpi_save: Optional[int] = None, + frameon: Optional[bool] = None, + vector_friendly: bool = True, + color_map: str = None, + format: str = "pdf", + transparent: bool = False, + ipython_format: str = "png2x", ): """Set resolution/size, styling and format of figures. This function is adapted from: https://github.com/theislab/scanpy/blob/f539870d7484675876281eb1c475595bf4a69bdb/scanpy/_settings.py - Arguments - --------- - dynamo: `bool` (default: `True`) - Init default values for :obj:`matplotlib.rcParams` suited for dynamo. - background: `str` (default: `white`) - The background color of the plot. By default we use the white ground - which is suitable for producing figures for publication. Setting it to `black` background will - be great for presentation. - fontsize: `[float, float]` or None (default: `6`) - figsize: `(float, float)` (default: `(6.5, 5)`) - Width and height for default figure size. - dpi: `int` or None (default: `None`) - Resolution of rendered figures - this influences the size of figures in notebooks. - dpi_save: `int` or None (default: `None`) - Resolution of saved figures. This should typically be higher to achieve - publication quality. - frameon: `bool` or None (default: `None`) - Add frames and axes labels to scatter plots. - vector_friendly: `bool` (default: `True`) - Plot scatter plots using `png` backend even when exporting as `pdf` or `svg`. - color_map: `str` (default: `None`) - Convenience method for setting the default color map. - format: {'png', 'pdf', 'svg', etc.} (default: 'pdf') - This sets the default format for saving figures: `file_format_figs`. - transparent: `bool` (default: `False`) - Save figures with transparent back ground. Sets `rcParams['savefig.transparent']`. - ipython_format : list of `str` (default: 'png2x') - Only concerns the notebook/IPython environment; see - `IPython.core.display.set_matplotlib_formats` for more details. + + Args: + dynamo: Init default values for :obj:`matplotlib.rcParams` suited for dynamo. + background: The background color of the plot. By default we use the white ground which is suitable for producing + figures for publication. Setting it to `black` background will be great for presentation. + fontsize: Size of font. + figsize: Width and height for default figure size. + dpi: Resolution of rendered figures - this influences the size of figures in notebooks. + dpi_save: Resolution of saved figures. This should typically be higher to achieve publication quality. + frameon: Add frames and axes labels to scatter plots. + vector_friendly: Plot scatter plots using `png` backend even when exporting as `pdf` or `svg`. + color_map: Convenience method for setting the default color map. + format: This sets the default format for saving figures: `file_format_figs`. This can be `png`, `pdf`, `svg`, etc. + transparent: Save figures with transparent back ground. Sets `rcParams['savefig.transparent']`. + ipython_format: Only concerns the notebook/IPython environment; see `IPython.core.display.set_matplotlib_formats` + for more details. """ try: @@ -762,8 +752,8 @@ def reset_rcParams(): rcParams.update(rcParamsDefault) -def set_pub_style(scaler=1): - """formatting helper function that can be used to save publishable figures""" +def set_pub_style(scaler: float = 1) -> None: + """Formatting helper function that can be used to save publishable figures""" set_figure_params("dynamo", background="white") matplotlib.use("cairo") matplotlib.rcParams.update({"font.size": 4 * scaler}) @@ -781,8 +771,8 @@ def set_pub_style(scaler=1): matplotlib.rcParams.update(params) -def set_pub_style_mpltex(): - """formatting helper function based on mpltex package that can be used to save publishable figures""" +def set_pub_style_mpltex() -> None: + """Formatting helper function based on mpltex package that can be used to save publishable figures""" set_figure_params("dynamo", background="white") matplotlib.use("cairo") # the following code is adapted from https://github.com/liuyxpp/mpltex diff --git a/dynamo/data_io.py b/dynamo/data_io.py index 295a9aa6e..5e00ebad6 100755 --- a/dynamo/data_io.py +++ b/dynamo/data_io.py @@ -2,6 +2,7 @@ import os from functools import reduce +from typing import List, Optional from anndata import ( AnnData, @@ -22,20 +23,15 @@ from .tools.Markov import KernelMarkovChain -def make_dir(path: str, can_exist=True): - """wrapper for making directory +def make_dir(path: str, can_exist=True) -> bool: + """Wrapper for making directory - Parameters - ---------- - path : - str or path object - can_exist : bool, optional - if path can exist or not. If set to True and path exists, an exception will be raised. + Args: + path: A str or path object + can_exist: If path can exist or not. If set to True and path exists, an exception will be raised. - Returns - ------- - bool - if a new directory has been created + Returns: + Boolean value about if a new directory has been created. """ if os.path.exists(path): main_info(path + " : exists") @@ -54,7 +50,7 @@ def make_dir(path: str, can_exist=True): return True -def convert2float(adata, columns, var=False): +def convert2float(adata: AnnData, columns: List, var: bool = False) -> None: """This helper function can convert the category columns (undesiredly converted) when saving adata object into h5ad file back to float type.""" @@ -81,27 +77,21 @@ def convert2float(adata, columns, var=False): adata.obs[i] = data.copy() -def load_NASC_seq(dir, type="TPM", delimiter="_", colnames=None, dropna=False): - """Function to create an anndata object from NASC-seq pipeline - - Parameters - ---------- - dir: `str` - The directory that points to the NASC-seq pipeline analysis folder (something like /Experimentdir). - type: `str` (default: `TPM`) - The data type that will be used as the gene expression. One of `{'TPM', 'FPKM', 'Reads'}`. - delimiter: `str` (default: `_`) - delimiter pattern for splitting the cells names (columns of each count table) - colnames: `list` or none - The list of column names after splitting the cell names. - dropna: `bool` - Whether to drop all genes that have any np.nan values across all cells. If not, all na values will be filled - as 0. - - Returns - ------- - adata: :class:`~anndata.AnnData` - AnnData object with the `new` and `total` layers. +def load_NASC_seq( + dir: str, type: str = "TPM", delimiter: str = "_", colnames: Optional[List] = None, dropna: bool = False +) -> AnnData: + """Function to create an anndata object from NASC-seq pipeline. + + Args: + dir: The directory that points to the NASC-seq pipeline analysis folder (something like /Experimentdir). + type: The data type that will be used as the gene expression. One of `{'TPM', 'FPKM', 'Reads'}`. + delimiter: Delimiter pattern for splitting the cells names (columns of each count table) + colnames: The list of column names after splitting the cell names. + dropna: Whether to drop all genes that have any np.nan values across all cells. If not, all na values will be + filled as 0. + + Returns: + AnnData object with the `new` and `total` layers. """ import glob @@ -237,15 +227,11 @@ def load_NASC_seq(dir, type="TPM", delimiter="_", colnames=None, dropna=False): def aggregate_adata(file_list: list) -> AnnData: """Aggregate gene expression from adata.X or layer for a list of adata based on the same cell and gene names. - Parameters - ---------- - file_list: - A list of strings specifies the link to the anndata object. + Args: + file_list: A list of strings specifies the link to the anndata object. - Returns - ------- - agg_adata: - Aggregated adata object. + Returns: + Aggregated adata object. """ import anndata @@ -284,8 +270,17 @@ def aggregate_adata(file_list: list) -> AnnData: return agg_adata -def cleanup(adata, del_prediction=False, del_2nd_moments=False): - """clean up adata before saving it to a file""" +def cleanup(adata: AnnData, del_prediction: bool = False, del_2nd_moments: bool = False) -> AnnData: + """Clean up adata before saving it to a file. + + Args: + adata: The anndata object to be cleaned up. + del_prediction: Whether to delete the prediction from the adata object. + del_2nd_moments: Whether to delete the 2nd moments from the adata object. + + Returns: + The cleaned up anndata object. + """ if "pca_fit" in adata.uns_keys(): adata.uns["pca_fit"] = None @@ -327,7 +322,9 @@ def cleanup(adata, del_prediction=False, del_2nd_moments=False): return adata -def export_rank_xlsx(adata, path="rank_info.xlsx", ext="excel", rank_prefix="rank"): +def export_rank_xlsx( + adata: AnnData, path: str = "rank_info.xlsx", ext: str = "excel", rank_prefix: str = "rank" +) -> None: import pandas as pd with pd.ExcelWriter(path) as writer: diff --git a/dynamo/dynamo_logger.py b/dynamo/dynamo_logger.py index 20a3c90fd..666d337c0 100644 --- a/dynamo/dynamo_logger.py +++ b/dynamo/dynamo_logger.py @@ -1,3 +1,5 @@ +from typing import Iterable, Optional + import functools import logging import sys @@ -5,28 +7,39 @@ from contextlib import contextmanager -def silence_logger(name): +def silence_logger(name: str) -> None: """Given a logger name, silence it completely. - :param name: name of the logger - :type name: str + Args: + name: Name of the logger """ package_logger = logging.getLogger(name) package_logger.setLevel(logging.CRITICAL + 100) package_logger.propagate = False -def set_logger_level(name, level): - """Given a logger name, silence it completely. +def set_logger_level(name: str, level: int) -> None: + """Given a logger name, set its logging level. - :param name: name of the logger - :type name: str + Args: + name: Name of the logger """ package_logger = logging.getLogger(name) package_logger.setLevel(level) -def format_logging_message(msg, logging_level, indent_level=1, indent_space_num=6): +def format_logging_message(msg: str, logging_level: int, indent_level: int = 1, indent_space_num: int = 6) -> str: + """Format the logging message with the logging level and indentation. + + Args: + msg: The logging message. + logging_level: The logging level. + indent_level: The indentation level. + indent_space_num: The number of spaces for each indentation level. + + Returns: + The formatted logging message. + """ indent_str = "-" * indent_space_num prefix = indent_str * indent_level prefix = "|" + prefix[1:] @@ -47,7 +60,13 @@ class Logger: FORMAT = "%(message)s" - def __init__(self, namespace="main", level=None): + def __init__(self, namespace: str = "main", level: Optional[int] = None) -> None: + """Initialize the logger. + + Args: + namespace: The namespace for the logger. + level: The logging level. + """ self.namespace = namespace self.logger = logging.getLogger(namespace) @@ -80,12 +99,12 @@ def __init__(self, namespace="main", level=None): else: self.logger.setLevel(logging.INFO) - def namespaced(self, namespace): + def namespaced(self, namespace: str): """Function decorator to set the logging namespace for the duration of the function. - :param namespace: the namespace - :type namespace: str + Args: + namespace: The namespace. """ def wrapper(func): @@ -103,56 +122,63 @@ def inner(*args, **kwargs): return wrapper @contextmanager - def namespaced_context(self, namespace): + def namespaced_context(self, namespace: str) -> None: """Context manager to set the logging namespace. - :param namespace: the namespace - :type namespace: str + Args: + namespace: The namespace. """ previous = self.namespace self.namespace = namespace yield self.namespace = previous - def namespace_message(self, message): + def namespace_message(self, message: str) -> str: """Add namespace information at the beginning of the logging message. - :param message: the logging message - :type message: str - - :return: namespaced message - :rtype: string + Args: + message: the logging message """ return f"[{self.namespace}] {message}" - def setLevel(self, *args, **kwargs): + def setLevel(self, *args, **kwargs) -> None: + """Set the logging level.""" return self.logger.setLevel(*args, **kwargs) - def debug(self, message, indent_level=1, *args, **kwargs): + def debug(self, message: str, indent_level: int = 1, *args, **kwargs) -> None: + """Log a debug message.""" message = format_logging_message(message, logging.DEBUG, indent_level=indent_level) return self.logger.debug(message, *args, **kwargs) - def info(self, message, indent_level=1, *args, **kwargs): + def info(self, message: str, indent_level: int = 1, *args, **kwargs) -> None: + """Log an info message.""" message = format_logging_message(message, logging.INFO, indent_level=indent_level) return self.logger.info(message, *args, **kwargs) - def warning(self, message, indent_level=1, *args, **kwargs): + def warning(self, message: str, indent_level: int = 1, *args, **kwargs) -> None: + """Log a warning message.""" message = format_logging_message(message, logging.WARNING, indent_level=indent_level) return self.logger.warning(message, *args, **kwargs) - def exception(self, message, indent_level=1, *args, **kwargs): + def exception(self, message: str, indent_level: int = 1, *args, **kwargs) -> None: + """Log an exception message.""" message = format_logging_message(message, logging.ERROR, indent_level=indent_level) return self.logger.exception(message, *args, **kwargs) - def critical(self, message, indent_level=1, *args, **kwargs): + def critical(self, message: str, indent_level: int = 1, *args, **kwargs) -> None: + """Log a critical message.""" message = format_logging_message(message, logging.CRITICAL, indent_level=indent_level) return self.logger.critical(message, *args, **kwargs) - def error(self, message, indent_level=1, *args, **kwargs): + def error(self, message: str, indent_level: int = 1, *args, **kwargs) -> None: + """Log an error message.""" message = format_logging_message(message, logging.ERROR, indent_level=indent_level) return self.logger.error(message, *args, **kwargs) - def info_insert_adata(self, key, adata_attr="obsm", log_level=logging.NOTSET, indent_level=1, *args, **kwargs): + def info_insert_adata( + self, key: str, adata_attr: str = "obsm", log_level: int = logging.NOTSET, indent_level: int = 1, *args, **kwargs + ) -> None: + """Log a message for inserting data into an AnnData object.""" message = " %s to %s in AnnData Object." % (key, adata_attr) if log_level == logging.NOTSET or log_level == logging.DEBUG: self.debug(message, indent_level=indent_level, *args, **kwargs) @@ -167,28 +193,40 @@ def info_insert_adata(self, key, adata_attr="obsm", log_level=logging.NOTSET, in else: raise NotImplementedError - def info_insert_adata_var(self, key, log_level, indent_level, *args, **kwargs): + def info_insert_adata_var(self, key: str, log_level: int, indent_level: int, *args, **kwargs) -> None: + """Log a message for inserting data into the .var of an AnnData object.""" return self.info_insert_adata( self, key, adata_attr="var", log_level=log_level, indent_level=indent_level, *args, **kwargs ) - def info_insert_adata_obsm(self, key, log_level, indent_level, *args, **kwargs): + def info_insert_adata_obsm(self, key: str, log_level: int, indent_level: int, *args, **kwargs) -> None: + """Log a message for inserting data into the .obsm of an AnnData object.""" return self.info_insert_adata( self, key, adata_attr="obsm", log_level=log_level, indent_level=indent_level, *args, **kwargs ) - def info_insert_adata_uns(self, key, log_level, indent_level, *args, **kwargs): + def info_insert_adata_uns(self, key: str, log_level: int, indent_level: int, *args, **kwargs) -> None: + """Log a message for inserting data into the .uns of an AnnData object.""" return self.info_insert_adata( self, key, adata_attr="uns", log_level=log_level, indent_level=indent_level, *args, **kwargs ) - def log_time(self): + def log_time(self) -> float: + """Log the current time and return the time passed since the last log.""" now = time.time() self.time_passed = now - self.previous_timestamp self.previous_timestamp = now return self.time_passed - def report_progress(self, percent=None, count=None, total=None, progress_name="", indent_level=1): + def report_progress( + self, + percent: Optional[float] = None, + count: Optional[int] = None, + total: Optional[int] = None, + progress_name: str = "", + indent_level: int = 1, + ) -> None: + """Report the progress of a task.""" if percent is None: assert (not count is None) and (not total is None) percent = count / total * 100 @@ -203,7 +241,8 @@ def report_progress(self, percent=None, count=None, total=None, progress_name="" self.logger_stream_handler.flush() self.logger_stream_handler.terminator = saved_terminator - def finish_progress(self, progress_name="", time_unit="s", indent_level=1): + def finish_progress(self, progress_name: str = "", time_unit: str = "s", indent_level: int = 1) -> None: + """Finish the progress of a task.""" self.log_time() # self.report_progress(percent=100, progress_name=progress_name) @@ -226,14 +265,10 @@ def request_report_hook(self, bn: int, rs: int, ts: int) -> None: The reporthook argument should be a callable that accepts a block number, a read size, and the total file size of the URL target. The data argument should be valid URL encoded data. - Parameters - ---------- - bs : - block number - rs : - read size - ts : - total size + Args: + bs: block number. + rs: read size. + ts: total size. """ if self.report_hook_percent_state is None: self.report_hook_percent_state = 0 @@ -252,6 +287,7 @@ def request_report_hook(self, bn: int, rs: int, ts: int) -> None: class LoggerManager: + """A manager for Dynamo-specific loggers.""" DEBUG = logging.DEBUG INFO = logging.INFO @@ -262,19 +298,25 @@ class LoggerManager: temp_timer_logger = Logger("dynamo-temp-timer-logger") @staticmethod - def get_main_logger(): + def get_main_logger() -> Logger: + """Get the main logger.""" return LoggerManager.main_logger @staticmethod - def gen_logger(namespace: str): + def gen_logger(namespace: str) -> Logger: + """Generate a logger with a given namespace.""" return Logger(namespace) @staticmethod - def get_temp_timer_logger(): + def get_temp_timer_logger() -> Logger: + """Get the temporary timer logger.""" return LoggerManager.temp_timer_logger @staticmethod - def progress_logger(generator, logger=None, progress_name="", indent_level=1): + def progress_logger( + generator: Iterable, logger: Optional[Logger] = None, progress_name: str = "", indent_level: int = 1, + ) -> Iterable: + """A generator that logs the progress of another generator.""" if logger is None: logger = LoggerManager.get_temp_timer_logger() iterator = iter(generator) @@ -294,81 +336,97 @@ def progress_logger(generator, logger=None, progress_name="", indent_level=1): logger.finish_progress(progress_name=progress_name, indent_level=indent_level) -def main_info(message, indent_level=1): +def main_info(message: str, indent_level: int = 1) -> None: + """Log an info message.""" LoggerManager.main_logger.info(message, indent_level) -def main_debug(message, indent_level=1): +def main_debug(message: str, indent_level: int = 1) -> None: + """Log a debug message.""" LoggerManager.main_logger.debug(message, indent_level) -def main_warning(message, indent_level=1): +def main_warning(message: str, indent_level: int = 1) -> None: + """Log a warning message.""" LoggerManager.main_logger.warning(message, indent_level) -def main_exception(message, indent_level=1): +def main_exception(message: str, indent_level: int = 1) -> None: + """Log an exception message.""" LoggerManager.main_logger.exception(message, indent_level) -def main_critical(message, indent_level=1): +def main_critical(message: str, indent_level: int = 1) -> None: + """Log a critical message.""" LoggerManager.main_logger.critical(message, indent_level) -def main_tqdm(generator, desc="", indent_level=1, logger=LoggerManager().main_logger): +def main_tqdm( + generator: Iterable, desc: str = "", indent_level: int = 1, logger: LoggerManager = LoggerManager().main_logger, +) -> Iterable: """a TQDM style wrapper for logging something like a loop. - e.g. - for item in main_tqdm(alist, desc=""): - do something - - Parameters - ---------- - generator : [type] - same as what you put in tqdm - desc : str, optional - description of your progress + + Args: + generator: The generator you want to log. + desc: Description of your progress. + + Examples: + >>> for item in main_tqdm(alist, desc=""): + ... do something """ return LoggerManager.progress_logger(generator, logger=logger, progress_name=desc, indent_level=indent_level) -def main_log_time(): +def main_log_time() -> float: + """Log the current time and return the time passed since the last log.""" LoggerManager.main_logger.log_time() -def main_silence(): +def main_silence() -> None: + """Silence the main logger.""" LoggerManager.main_logger.setLevel(logging.CRITICAL + 100) -def main_finish_progress(progress_name=""): +def main_finish_progress(progress_name: str = "") -> None: + """Finish the progress of a task.""" LoggerManager.main_logger.finish_progress(progress_name=progress_name) -def main_info_insert_adata(key, adata_attr="obsm", indent_level=1, *args, **kwargs): +def main_info_insert_adata(key: str, adata_attr: str = "obsm", indent_level: int = 1, *args, **kwargs) -> None: + """Log a message for inserting data into an AnnData object.""" LoggerManager.main_logger.info_insert_adata(key, adata_attr=adata_attr, indent_level=indent_level, *args, **kwargs) -def main_info_insert_adata_var(key, indent_level=1, *args, **kwargs): +def main_info_insert_adata_var(key: str, indent_level: int = 1, *args, **kwargs) -> None: + """Log a message for inserting data into the .var of an AnnData object.""" main_info_insert_adata(key, "var", indent_level, *args, **kwargs) -def main_info_insert_adata_uns(key, indent_level=1, *args, **kwargs): +def main_info_insert_adata_uns(key: str, indent_level: int = 1, *args, **kwargs) -> None: + """Log a message for inserting data into the .uns of an AnnData object.""" main_info_insert_adata(key, "uns", indent_level, *args, **kwargs) -def main_info_insert_adata_obsm(key, indent_level=1, *args, **kwargs): +def main_info_insert_adata_obsm(key: str, indent_level: int = 1, *args, **kwargs) -> None: + """Log a message for inserting data into the .obsm of an AnnData object.""" main_info_insert_adata(key, "obsm", indent_level, *args, **kwargs) -def main_info_insert_adata_obs(key, indent_level=1, *args, **kwargs): +def main_info_insert_adata_obs(key: str, indent_level: int = 1, *args, **kwargs) -> None: + """Log a message for inserting data into the .obs of an AnnData object.""" main_info_insert_adata(key, "obs", indent_level, *args, **kwargs) -def main_info_insert_adata_layer(key, indent_level=1, *args, **kwargs): +def main_info_insert_adata_layer(key: str, indent_level: int = 1, *args, **kwargs) -> None: + """Log a message for inserting data into the .layers of an AnnData object.""" main_info_insert_adata(key, "layers", indent_level, *args, **kwargs) -def main_info_verbose_timeit(msg): +def main_info_verbose_timeit(msg: str) -> None: + """Log a message with the time passed.""" LoggerManager.main_logger.info(msg) -def main_set_level(level): +def main_set_level(level: int) -> None: + """Set the logging level of the main logger.""" set_logger_level("dynamo", level) diff --git a/dynamo/get_version.py b/dynamo/get_version.py index e3108e6db..94f24ed7d 100755 --- a/dynamo/get_version.py +++ b/dynamo/get_version.py @@ -18,7 +18,8 @@ ON_RTD = os.environ.get("READTHEDOCS") == "True" -def match_groups(regex, target): +def match_groups(regex: str, target: str) -> List[str]: + """Match a regex and return the groups as a list. Raise an error if the regex does not match.""" match = re.match(regex, target) if match is None: raise re.error(f"Regex does not match “{target}”. RE Pattern: {regex}", regex) @@ -26,6 +27,7 @@ def match_groups(regex, target): class Version(NamedTuple): + """A parsed version string.""" release: str dev: Optional[str] labels: List[str] @@ -42,8 +44,8 @@ def __str__(self): return f"{release}{dev}{labels}" -def get_version_from_dirname(name, parent): - """Extracted sdist""" +def get_version_from_dirname(name: str, parent: Path) -> Optional[Version]: + """Extracted sdist.""" parent = parent.resolve() re_dirname = re.compile(f"{name}-{RE_VERSION}$") @@ -53,7 +55,8 @@ def get_version_from_dirname(name, parent): return Version.parse(parent.name[len(name) + 1 :]) -def get_version_from_git(parent): +def get_version_from_git(parent: Path) -> Optional[Version]: + """Get the version from git describe.""" parent = parent.resolve() try: @@ -102,7 +105,8 @@ def get_version_from_git(parent): return Version(release, dev, labels) -def get_version_from_metadata(name: str, parent: Optional[Path] = None): +def get_version_from_metadata(name: str, parent: Optional[Path] = None) -> Optional[Version]: + """Get the version from the package metadata.""" try: from pkg_resources import DistributionNotFound, get_distribution except ImportError: @@ -128,16 +132,20 @@ def get_version_from_metadata(name: str, parent: Optional[Path] = None): def get_version(package: Union[Path, str]) -> str: - """Get the version of a package or module - Pass a module path or package name. - The former is recommended, since it also works for not yet installed packages. + """Get the version of a package or module. + + Pass a module path or package name. The former is recommended, since it also works for not yet installed packages. Supports getting the version from - #. The directory name (as created by ``setup.py sdist``) - #. The output of ``git describe`` - #. The package metadata of an installed package - (This is the only possibility when passing a name) + #. The directory name (as created by ``setup.py sdist``) + #. The output of ``git describe`` + #. The package metadata of an installed package + (This is the only possibility when passing a name) + Args: package: package name or module path (``…/module.py`` or ``…/module/__init__.py``) + + Returns: + The version string. """ path = Path(package) if not path.suffix and len(path.parts) == 1: # Is probably not a path @@ -165,7 +173,8 @@ def get_version(package: Union[Path, str]) -> str: ) -def get_dynamo_version(): +def get_dynamo_version() -> Optional[str]: + """Get the version of Dynamo.""" import pkg_resources try: @@ -179,7 +188,8 @@ def get_dynamo_version(): def get_all_dependencies_version(display=True): - """ + """Get the version of all dependencies of Dynamo. + Adapted from answer 2 in https://stackoverflow.com/questions/40428931/package-for-listing-version-of-packages-used-in-a-jupyter-notebook """ @@ -210,6 +220,7 @@ def get_all_dependencies_version(display=True): def session_info(): + """Show the versions of all dependencies of the current environment by session_info.""" try: import session_info except: diff --git a/dynamo/sample_data.py b/dynamo/sample_data.py index 43181e603..912927f82 100755 --- a/dynamo/sample_data.py +++ b/dynamo/sample_data.py @@ -1,15 +1,18 @@ +from typing import Optional + import ntpath import os from pathlib import Path from urllib.request import urlretrieve import pandas as pd -from anndata import read_h5ad, read_loom +from anndata import AnnData, read_h5ad, read_loom from .dynamo_logger import LoggerManager, main_info, main_log_time -def download_data(url, file_path=None, dir="./data"): +def download_data(url: str, file_path: Optional[str] = None, dir: str = "./data") -> str: + """Download example data to local folder.""" file_path = ntpath.basename(url) if file_path is None else file_path file_path = os.path.join(dir, file_path) main_info("Downloading data to " + file_path) @@ -26,18 +29,15 @@ def download_data(url, file_path=None, dir="./data"): return file_path -def get_adata(url, filename=None): +def get_adata(url: str, filename: Optional[str] = None) -> Optional[AnnData]: """Download example data to local folder. - Parameters - ---------- - url: - filename + Args: + url: the url of the data. + filename: the name of the file to be saved. - Returns - ------- - adata: :class:`~anndata.AnnData` - an Annodata object. + Returns: + An Annodata object. """ try: @@ -86,15 +86,12 @@ def scifate(): def scNT_seq_neuron_splicing( - url="https://www.dropbox.com/s/g1afqdcsczgyj2m/neuron_splicing_4_11.h5ad?dl=1", - filename="neuron_splicing.h5ad", -): + url: str = "https://www.dropbox.com/s/g1afqdcsczgyj2m/neuron_splicing_4_11.h5ad?dl=1", + filename: str = "neuron_splicing.h5ad", +) -> AnnData: """The neuron splicing data is from Qiu, et al (2020). - This data consists of 44,021 genes across 13,476 cells. - Returns - ------- - Returns `adata` object + This data consists of 44,021 genes across 13,476 cells. """ adata = get_adata(url, filename) @@ -102,15 +99,12 @@ def scNT_seq_neuron_splicing( def scNT_seq_neuron_labeling( - url="https://www.dropbox.com/s/lk9cl63yd28mfuq/neuron_labeling.h5ad?dl=1", - filename="neuron_labeling.h5ad", -): + url: str = "https://www.dropbox.com/s/lk9cl63yd28mfuq/neuron_labeling.h5ad?dl=1", + filename: str = "neuron_labeling.h5ad", +) -> AnnData: """The neuron splicing data is from Qiu, et al (2020). - This data consists of 24, 078 genes across 3,060 cells. - Returns - ------- - Returns `adata` object + This data consists of 24, 078 genes across 3,060 cells. """ adata = get_adata(url, filename) @@ -122,15 +116,12 @@ def cite_seq(): def zebrafish( - url="https://www.dropbox.com/scl/fi/3zt89ee0j5twxk4ttzmij/zebrafish.h5ad?rlkey=phwg0b7aqiizd9kf69l2kciak&dl=1", - filename="zebrafish.h5ad", -): + url: str = "https://www.dropbox.com/scl/fi/3zt89ee0j5twxk4ttzmij/zebrafish.h5ad?rlkey=phwg0b7aqiizd9kf69l2kciak&dl=1", + filename: str = "zebrafish.h5ad", +) -> AnnData: """The zebrafish is from Saunders, et al (2019). - This data consists of 16,940 genes across 4,181 cells. - Returns - ------- - Returns `adata` object + This data consists of 16,940 genes across 4,181 cells. """ adata = get_adata(url, filename) @@ -138,17 +129,13 @@ def zebrafish( def DentateGyrus( - url="http://pklab.med.harvard.edu/velocyto/DentateGyrus/DentateGyrus.loom", - filename=None, -): + url: str = "http://pklab.med.harvard.edu/velocyto/DentateGyrus/DentateGyrus.loom", + filename: Optional[str] = None, +) -> AnnData: """The Dentate Gyrus dataset used in https://github.com/velocyto-team/velocyto-notebooks/blob/master/python/DentateGyrus.ipynb. - This data consists of 27,998 genes across 18,213 cells. + This data consists of 27,998 genes across 18,213 cells. Note this one http://pklab.med.harvard.edu/velocyto/DG1/10X43_1.loom: a subset of the above data. - - Returns - ------- - Returns `adata` object """ adata = get_adata(url, filename) @@ -156,15 +143,12 @@ def DentateGyrus( def Haber( - url="http://pklab.med.harvard.edu/velocyto/Haber_et_al/Haber_et_al.loom", - filename=None, -): + url: str = "http://pklab.med.harvard.edu/velocyto/Haber_et_al/Haber_et_al.loom", + filename: Optional[str] = None, +) -> AnnData: """The Haber dataset used in https://github.com/velocyto-team/velocyto-notebooks/blob/master/python/Haber_et_al.ipynb - This data consists of 27,998 genes across 7,216 cells. - Returns - ------- - Returns `adata` object + This data consists of 27,998 genes across 7,216 cells. """ adata = get_adata(url, filename) urlretrieve( @@ -178,15 +162,12 @@ def Haber( def hgForebrainGlutamatergic( - url="http://pklab.med.harvard.edu/velocyto/hgForebrainGlut/hgForebrainGlut.loom", - filename=None, -): + url: str = "http://pklab.med.harvard.edu/velocyto/hgForebrainGlut/hgForebrainGlut.loom", + filename: Optional[str] = None, +) -> AnnData: """The hgForebrainGlutamatergic dataset used in https://github.com/velocyto-team/velocyto-notebooks/blob/master/python/hgForebrainGlutamatergic.ipynb - This data consists of 32,738 genes across 1,720 cells. - Returns - ------- - Returns `adata` object + This data consists of 32,738 genes across 1,720 cells. """ adata = get_adata(url, filename) urlretrieve( @@ -200,15 +181,12 @@ def hgForebrainGlutamatergic( def chromaffin( - url="https://www.dropbox.com/s/awevuz836tlclvw/onefilepercell_A1_unique_and_others_J2CH1.loom?dl=1", - filename="onefilepercell_A1_unique_and_others_J2CH1.loom", -): # + url: str = "https://www.dropbox.com/s/awevuz836tlclvw/onefilepercell_A1_unique_and_others_J2CH1.loom?dl=1", + filename: str = "onefilepercell_A1_unique_and_others_J2CH1.loom", +) -> AnnData: # """The chromaffin dataset used in http://pklab.med.harvard.edu/velocyto/notebooks/R/chromaffin2.nb.html - This data consists of 32,738 genes across 1,720 cells. - Returns - ------- - Returns `adata` object + This data consists of 32,738 genes across 1,720 cells. """ adata = get_adata(url, filename) @@ -218,15 +196,12 @@ def chromaffin( def BM( - url="http://pklab.med.harvard.edu/velocyto/mouseBM/SCG71.loom", - filename=None, -): + url: str = "http://pklab.med.harvard.edu/velocyto/mouseBM/SCG71.loom", + filename: Optional[str] = None, +) -> AnnData: """The BM dataset used in http://pklab.med.harvard.edu/velocyto/notebooks/R/SCG71.nb.html - This data consists of 24,421genes across 6,667 cells. - Returns - ------- - Returns `adata` object + This data consists of 24,421genes across 6,667 cells. """ adata = get_adata(url, filename) @@ -235,17 +210,13 @@ def BM( def pancreatic_endocrinogenesis( - url="https://github.com/theislab/scvelo_notebooks/raw/master/data/Pancreas/endocrinogenesis_day15.h5ad", - filename=None, -): + url: str ="https://github.com/theislab/scvelo_notebooks/raw/master/data/Pancreas/endocrinogenesis_day15.h5ad", + filename: Optional[str] = None, +) -> AnnData: """Pancreatic endocrinogenesis. Data from scvelo - Pancreatic epithelial and Ngn3-Venus fusion (NVF) cells during secondary transition / embryonic day 15.5. - https://dev.biologists.org/content/146/12/dev173849 - - Returns - ------- - Returns `adata` object + Pancreatic epithelial and Ngn3-Venus fusion (NVF) cells during secondary transition / embryonic day 15.5. + https://dev.biologists.org/content/146/12/dev173849 """ adata = get_adata(url, filename) @@ -254,17 +225,13 @@ def pancreatic_endocrinogenesis( def DentateGyrus_scvelo( - url="https://www.dropbox.com/s/3w1wzb0b68fhdsw/dentategyrus_scv.h5ad?dl=1", - filename="dentategyrus_scv.h5ad", -): + url: str = "https://www.dropbox.com/s/3w1wzb0b68fhdsw/dentategyrus_scv.h5ad?dl=1", + filename: str = "dentategyrus_scv.h5ad", +) -> AnnData: """The Dentate Gyrus dataset used in https://github.com/theislab/scvelo_notebooks/tree/master/data/DentateGyrus. - This data consists of 13,913 genes across 2,930 cells. - Note this dataset is the same processed dataset from the excellent scVelo package, which is a subset of the DentateGyrus dataset. - - Returns - ------- - Returns `adata` object + This data consists of 13,913 genes across 2,930 cells. Note this dataset is the same processed dataset from the + excellent scVelo package, which is a subset of the DentateGyrus dataset. """ adata = get_adata(url, filename) @@ -275,13 +242,9 @@ def scEU_seq_rpe1( url: str = "https://www.dropbox.com/s/25enev458c8egn7/rpe1.h5ad?dl=1", filename: str = "rpe1.h5ad", ): - """ - Download rpe1 dataset from Battich, et al (2020) via Dropbox link. - This data consists of 13,913 genes across 2,930 cells. + """Download rpe1 dataset from Battich, et al (2020) via Dropbox link. - Returns - ------- - Returns `adata` object + This data consists of 13,913 genes across 2,930 cells. """ main_info("Downloading scEU_seq data") adata = get_adata(url, filename) @@ -292,13 +255,9 @@ def scEU_seq_organoid( url: str = "https://www.dropbox.com/s/es7sroy5ceb7wwz/organoid.h5ad?dl=1", filename: str = "organoid.h5ad", ): - """ - Download organoid dataset from Battich, et al (2020) via Dropbox link. - This data consists of 9,157 genes across 3,831 cells. + """Download organoid dataset from Battich, et al (2020) via Dropbox link. - Returns - ------- - Returns `adata` object + This data consists of 9,157 genes across 3,831 cells. """ main_info("Downloading scEU_seq data") adata = get_adata(url, filename) @@ -310,8 +269,8 @@ def hematopoiesis( # url: str = "https://pitt.box.com/shared/static/kyh3s4wrxdywupn9wk9r2j27vzlvk8vf.h5ad", # with box # url: str = "https://pitt.box.com/shared/static/efqa8icu1m6d1ghfcc3s9tj0j91pky1h.h5ad", # v0: umap_ori version filename: str = "hematopoiesis.h5ad", -): - """https://pitt.box.com/v/hematopoiesis-processed""" +) -> AnnData: + """Processed dataset originally from https://pitt.box.com/v/hematopoiesis-processed.""" main_info("Downloading processed hematopoiesis adata") adata = get_adata(url, filename) return adata @@ -321,14 +280,18 @@ def hematopoiesis_raw( url: str = "https://www.dropbox.com/s/rvkxvq8694xnxz3/hsc_raw_with_metadata.h5ad?dl=1", # url: str = "https://pitt.box.com/shared/static/bv7q0kgxjncc5uoget5wvmi700xwntje.h5ad", # with box filename: str = "hematopoiesis_raw.h5ad", -): - """https://pitt.box.com/v/hematopoiesis-processed""" +) -> AnnData: + """Processed dataset originally from https://pitt.box.com/v/hematopoiesis-processed.""" main_info("Downloading raw hematopoiesis adata") adata = get_adata(url, filename) return adata -def human_tfs(url="https://www.dropbox.com/scl/fi/pyocgrhvglg6p7q8yf9ol/human_tfs.txt?rlkey=kbc8vfzf72f8ez0xldrb5nb2d&dl=1", filename="human_tfs.txt"): +def human_tfs( + url: str = "https://www.dropbox.com/scl/fi/pyocgrhvglg6p7q8yf9ol/human_tfs.txt?rlkey=kbc8vfzf72f8ez0xldrb5nb2d&dl=1", + filename: str = "human_tfs.txt", +) -> pd.DataFrame: + """Download human transcription factors.""" file_path = download_data(url, filename) tfs = pd.read_csv(file_path, sep="\t") return tfs diff --git a/dynamo/utils.py b/dynamo/utils.py index a5de63ad5..a37aa4f1a 100644 --- a/dynamo/utils.py +++ b/dynamo/utils.py @@ -1,6 +1,6 @@ """General utility functions """ -from typing import Callable, Dict, List, Optional, Tuple, Union +from typing import Any, Callable, Dict, List, Optional, Tuple, Union import anndata import numpy as np @@ -9,26 +9,18 @@ from .dynamo_logger import LoggerManager -def isarray(arr): - """ - Check if a variable is an array. Essentially the variable has the attribute 'len' - and it is not a string. - """ +def isarray(arr: Any) -> bool: + """Check if a variable is an array. Essentially the variable has the attribute 'len' and it is not a string.""" return hasattr(arr, "__len__") and (not isinstance(arr, str) and (not isinstance(arr, type))) -def ismatrix(arr): - """ - Check if a variable is an array. Essentially the variable has the attribute 'len' - and it is not a string. - """ +def ismatrix(arr: Any) -> bool: + """Check if a variable is a matrix. Essentially the variable is `np.matrix` or `sp.issparse`.""" return type(arr) is np.matrix or sp.issparse(arr) -def areinstance(arr, dtype, logic_func=all): - """ - Check if elements of an array are all (by default) of 'dtype'. - """ +def areinstance(arr: Any, dtype: Union[type, List[type]], logic_func: Callable = all) -> bool: + """Check if elements of an array are all (by default) of 'dtype'.""" if not isarray(dtype): dtype = [dtype] ret = None @@ -40,28 +32,28 @@ def areinstance(arr, dtype, logic_func=all): return logic_func(ret) -def copy_adata(adata: anndata.AnnData, logger=None) -> anndata.AnnData: - """wrapper for deep copy adata and log copy operation since it is memory intensive. - - Parameters - ---------- - adata : - An adata object that will be deep copied. - logger : [bool], optional - Whether to report logging info - - Examples - -------- - >>> import dynamo as dyn - >>> adata = dyn.sample_data.hgForebrainGlutamatergic() - >>> original_adata = copy_adata(adata) - >>> # now after this statement, adata "points" to a new object, copy of the original - >>> adata = copy_adata(adata) - >>> adata.X[0, 1] = -999 - >>> # original_adata unchanged - >>> print(original_adata.X[0, 1]) - >>> # we can use adata = copy_adata(adata) inside a dynammo function when we want to create a adata copy - >>> # without worrying about changing the original copy. +def copy_adata(adata: anndata.AnnData, logger: Optional[LoggerManager] = None) -> anndata.AnnData: + """Wrapper for deep copy adata and log copy operation since it is memory intensive. + + Args: + adata: An adata object that will be deep copied. + logger : [bool], optional + Whether to report logging info + + Returns: + The deep copied adata object. + + Examples: + >>> import dynamo as dyn + >>> adata = dyn.sample_data.hgForebrainGlutamatergic() + >>> original_adata = copy_adata(adata) + >>> # now after this statement, adata "points" to a new object, copy of the original + >>> adata = copy_adata(adata) + >>> adata.X[0, 1] = -999 + >>> # original_adata unchanged + >>> print(original_adata.X[0, 1]) + >>> # we can use adata = copy_adata(adata) inside a dynammo function when we want to create a adata copy + >>> # without worrying about changing the original copy. """ if logger is None: logger = LoggerManager.get_main_logger() @@ -73,12 +65,14 @@ def copy_adata(adata: anndata.AnnData, logger=None) -> anndata.AnnData: return data -def normalize(x): +def normalize(x: np.ndarray) -> np.ndarray: + """Normalize the input array to [0, 1].""" x_min = np.min(x) return (x - x_min) / (np.max(x) - x_min) -def denormalize(y, x_min, x_max): +def denormalize(y: np.ndarray, x_min: float, x_max: float) -> np.ndarray: + """Denormalize the input array from [0, 1] to [x_min, x_max].""" return y * (x_max - x_min) + x_min @@ -93,10 +87,10 @@ def pca_to_expr( """Inverse transform the data with given principal components. Args: - X: raw data to transform. - PCs: the principal components. - mean: the mean used to fit the PCA. - func: additional function to transform the output. + X: Raw data to transform. + PCs: The principal components. + mean: The mean used to fit the PCA. + func: Additional function to transform the output. Returns: The inverse transformed data. @@ -121,10 +115,10 @@ def expr_to_pca( """Transform the data with given principal components. Args: - expr: raw data to transform. - PCs: the principal components. - mean: the mean of expr. - func: additional function to transform the output. + expr: Raw data to transform. + PCs: The principal components. + mean: The mean of expr. + func: Additional function to transform the output. Returns: The transformed data. From a59c426746efadb4e78e164175936b5792d640ec Mon Sep 17 00:00:00 2001 From: sichao Date: Tue, 20 Feb 2024 13:56:36 -0500 Subject: [PATCH 2/2] update docstringformat and missing typing --- dynamo/configuration.py | 20 +++++++++++--------- dynamo/data_io.py | 12 ++++++------ dynamo/dynamo_logger.py | 10 +++++----- dynamo/get_version.py | 2 +- dynamo/sample_data.py | 2 +- 5 files changed, 24 insertions(+), 22 deletions(-) diff --git a/dynamo/configuration.py b/dynamo/configuration.py index d05c0fc20..465bbf246 100755 --- a/dynamo/configuration.py +++ b/dynamo/configuration.py @@ -53,7 +53,7 @@ def gen_layer_pp_key(*keys): return "_".join(keys) def gen_layer_X_key(key: str) -> str: - """Generate dynamo style keys for adata.layer[X_*], used later in dynamics""" + """Generate dynamo style keys for adata.layer[X_*], used later in dynamics.""" return DynamoAdataKeyManager.gen_new_layer_key("X", key) def is_layer_X_key(key: str) -> bool: @@ -109,7 +109,9 @@ def check_if_layer_exist(adata: AnnData, layer: str) -> bool: return layer in adata.layers - def get_available_layer_keys(adata, layers="all", remove_pp_layers=True, include_protein=True) -> List[str]: + def get_available_layer_keys( + adata: AnnData, layers: str = "all", remove_pp_layers: bool = True, include_protein: bool = True, + ) -> List[str]: """Get the list of available layers' keys. If `layers` is set to all, return a list of all available layers; if `layers` is set to a list, then the intersetion of available layers and `layers` will be returned.""" layer_keys = list(adata.layers.keys()) @@ -271,7 +273,7 @@ def use_default_var_if_none(val: Any, key: str, replace_val: Optional[Any] = Non Args: val: The input value to check against. key: `key` stored in the dynamo configuration. E.g DynamoAdataConfig.RECIPE_MONOCLE_KEEP_RAW_LAYERS_KEY. - replace_val: The target value to replace, by default None + replace_val: The target value to replace, by default None. Returns: `val` or config value set in DynamoAdataConfig according to the method description above. @@ -560,9 +562,9 @@ def config_dynamo_rcParams( Args: background: The background color of the plot. By default we use the white ground which is suitable for producing figures for publication. Setting it to `black` background will be great for presentation. - prop_cycle: A list with hex color codes - fontsize: Size of font - color_map: Color map + prop_cycle: A list with hex color codes. + fontsize: Size of font. + color_map: Color map. frameon: Whether to have frame for the figure. Returns: @@ -708,7 +710,7 @@ def set_figure_params( vector_friendly: Plot scatter plots using `png` backend even when exporting as `pdf` or `svg`. color_map: Convenience method for setting the default color map. format: This sets the default format for saving figures: `file_format_figs`. This can be `png`, `pdf`, `svg`, etc. - transparent: Save figures with transparent back ground. Sets `rcParams['savefig.transparent']`. + transparent: Save figures with transparent background. Sets `rcParams['savefig.transparent']`. ipython_format: Only concerns the notebook/IPython environment; see `IPython.core.display.set_matplotlib_formats` for more details. """ @@ -753,7 +755,7 @@ def reset_rcParams(): def set_pub_style(scaler: float = 1) -> None: - """Formatting helper function that can be used to save publishable figures""" + """Formatting helper function that can be used to save publishable figures.""" set_figure_params("dynamo", background="white") matplotlib.use("cairo") matplotlib.rcParams.update({"font.size": 4 * scaler}) @@ -772,7 +774,7 @@ def set_pub_style(scaler: float = 1) -> None: def set_pub_style_mpltex() -> None: - """Formatting helper function based on mpltex package that can be used to save publishable figures""" + """Formatting helper function based on mpltex package that can be used to save publishable figures.""" set_figure_params("dynamo", background="white") matplotlib.use("cairo") # the following code is adapted from https://github.com/liuyxpp/mpltex diff --git a/dynamo/data_io.py b/dynamo/data_io.py index 5e00ebad6..b868e3285 100755 --- a/dynamo/data_io.py +++ b/dynamo/data_io.py @@ -23,11 +23,11 @@ from .tools.Markov import KernelMarkovChain -def make_dir(path: str, can_exist=True) -> bool: - """Wrapper for making directory +def make_dir(path: str, can_exist: bool = True) -> bool: + """Wrapper for making directory. Args: - path: A str or path object + path: A str or path object. can_exist: If path can exist or not. If set to True and path exists, an exception will be raised. Returns: @@ -78,14 +78,14 @@ def convert2float(adata: AnnData, columns: List, var: bool = False) -> None: def load_NASC_seq( - dir: str, type: str = "TPM", delimiter: str = "_", colnames: Optional[List] = None, dropna: bool = False + dir: str, type: str = "TPM", delimiter: str = "_", colnames: Optional[List] = None, dropna: bool = False, ) -> AnnData: """Function to create an anndata object from NASC-seq pipeline. Args: dir: The directory that points to the NASC-seq pipeline analysis folder (something like /Experimentdir). type: The data type that will be used as the gene expression. One of `{'TPM', 'FPKM', 'Reads'}`. - delimiter: Delimiter pattern for splitting the cells names (columns of each count table) + delimiter: Delimiter pattern for splitting the cells names (columns of each count table). colnames: The list of column names after splitting the cell names. dropna: Whether to drop all genes that have any np.nan values across all cells. If not, all na values will be filled as 0. @@ -323,7 +323,7 @@ def cleanup(adata: AnnData, del_prediction: bool = False, del_2nd_moments: bool def export_rank_xlsx( - adata: AnnData, path: str = "rank_info.xlsx", ext: str = "excel", rank_prefix: str = "rank" + adata: AnnData, path: str = "rank_info.xlsx", ext: str = "excel", rank_prefix: str = "rank", ) -> None: import pandas as pd diff --git a/dynamo/dynamo_logger.py b/dynamo/dynamo_logger.py index 666d337c0..9d766a469 100644 --- a/dynamo/dynamo_logger.py +++ b/dynamo/dynamo_logger.py @@ -11,7 +11,7 @@ def silence_logger(name: str) -> None: """Given a logger name, silence it completely. Args: - name: Name of the logger + name: Name of the logger. """ package_logger = logging.getLogger(name) package_logger.setLevel(logging.CRITICAL + 100) @@ -22,7 +22,7 @@ def set_logger_level(name: str, level: int) -> None: """Given a logger name, set its logging level. Args: - name: Name of the logger + name: Name of the logger. """ package_logger = logging.getLogger(name) package_logger.setLevel(level) @@ -137,7 +137,7 @@ def namespace_message(self, message: str) -> str: """Add namespace information at the beginning of the logging message. Args: - message: the logging message + message: the logging message. """ return f"[{self.namespace}] {message}" @@ -262,8 +262,8 @@ def finish_progress(self, progress_name: str = "", time_unit: str = "s", indent_ def request_report_hook(self, bn: int, rs: int, ts: int) -> None: """A callback required by the request lib: - The reporthook argument should be a callable that accepts a block number, a read size, and the - total file size of the URL target. The data argument should be valid URL encoded data. + The reporthook argument should be a callable that accepts a block number, a read size, and the + total file size of the URL target. The data argument should be valid URL encoded data. Args: bs: block number. diff --git a/dynamo/get_version.py b/dynamo/get_version.py index 94f24ed7d..9ce2daf1b 100755 --- a/dynamo/get_version.py +++ b/dynamo/get_version.py @@ -187,7 +187,7 @@ def get_dynamo_version() -> Optional[str]: return version -def get_all_dependencies_version(display=True): +def get_all_dependencies_version(display: bool = True): """Get the version of all dependencies of Dynamo. Adapted from answer 2 in diff --git a/dynamo/sample_data.py b/dynamo/sample_data.py index 912927f82..6260c413b 100755 --- a/dynamo/sample_data.py +++ b/dynamo/sample_data.py @@ -213,7 +213,7 @@ def pancreatic_endocrinogenesis( url: str ="https://github.com/theislab/scvelo_notebooks/raw/master/data/Pancreas/endocrinogenesis_day15.h5ad", filename: Optional[str] = None, ) -> AnnData: - """Pancreatic endocrinogenesis. Data from scvelo + """Pancreatic endocrinogenesis. Data from scvelo. Pancreatic epithelial and Ngn3-Venus fusion (NVF) cells during secondary transition / embryonic day 15.5. https://dev.biologists.org/content/146/12/dev173849