diff --git a/alphadia/calibration/property.py b/alphadia/calibration/property.py index 5cb6aec5..1391651f 100644 --- a/alphadia/calibration/property.py +++ b/alphadia/calibration/property.py @@ -22,9 +22,9 @@ def __init__( self, name: str = "", function: object = None, - input_columns: list[str] = [], - target_columns: list[str] = [], - output_columns: list[str] = [], + input_columns: list[str] | None = None, + target_columns: list[str] | None = None, + output_columns: list[str] | None = None, transform_deviation: None | float = None, **kwargs, ): @@ -59,7 +59,12 @@ def __init__( If set to None, the deviation is expressed in absolute units. """ - + if output_columns is None: + output_columns = [] + if target_columns is None: + target_columns = [] + if input_columns is None: + input_columns = [] self.name = name self.function = function self.input_columns = input_columns diff --git a/alphadia/fdr.py b/alphadia/fdr.py index 7f9ba85d..a365f467 100644 --- a/alphadia/fdr.py +++ b/alphadia/fdr.py @@ -172,7 +172,7 @@ def perform_fdr( def keep_best( df: pd.DataFrame, score_column: str = "proba", - group_columns: list[str] = ["channel", "precursor_idx"], + group_columns: list[str] | None = None, ): """Keep the best PSM for each group of PSMs with the same precursor_idx. This function is used to select the best candidate PSM for each precursor. @@ -196,6 +196,8 @@ def keep_best( pd.DataFrame The dataframe containing the best PSM for each group. """ + if group_columns is None: + group_columns = ["channel", "precursor_idx"] temp_df = df.reset_index(drop=True) temp_df = temp_df.sort_values(score_column, ascending=True) temp_df = temp_df.groupby(group_columns).head(1) diff --git a/alphadia/fdrexperimental.py b/alphadia/fdrexperimental.py index 899f4322..733c6d9b 100644 --- a/alphadia/fdrexperimental.py +++ b/alphadia/fdrexperimental.py @@ -127,7 +127,7 @@ def __init__( epochs: int = 10, learning_rate: float = 0.0002, weight_decay: float = 0.00001, - layers: list[int] = [100, 50, 20, 5], + layers: list[int] | None = None, dropout: float = 0.001, calculate_metrics: bool = True, metric_interval: int = 1, @@ -186,6 +186,8 @@ def __init__( Whether to use GPU acceleration if available. """ + if layers is None: + layers = [100, 50, 20, 5] self.test_size = test_size self.max_batch_size = max_batch_size self.min_batch_number = min_batch_number @@ -605,7 +607,7 @@ def __init__( epochs: int = 10, learning_rate: float = 0.0002, weight_decay: float = 0.00001, - layers: list[int] = [100, 50, 20, 5], + layers: list[int] | None = None, dropout: float = 0.001, metric_interval: int = 1000, **kwargs, @@ -646,7 +648,8 @@ def __init__( Interval for logging metrics during training. """ - + if layers is None: + layers = [100, 50, 20, 5] self.test_size = test_size self.batch_size = batch_size self.epochs = epochs @@ -919,7 +922,7 @@ def __init__( epochs: int = 10, learning_rate: float = 0.0002, weight_decay: float = 0.00001, - layers: list[int] = [100, 50, 20, 5], + layers: list[int] | None = None, dropout: float = 0.001, metric_interval: int = 1000, **kwargs, @@ -960,7 +963,8 @@ def __init__( Interval for logging metrics during training. """ - + if layers is None: + layers = [100, 50, 20, 5] self.test_size = test_size self.batch_size = batch_size self.epochs = epochs @@ -1236,13 +1240,15 @@ def __init__( self, input_dim, output_dim=2, - layers=[20, 10, 5], + layers: list[int] | None = None, dropout=0.5, ): """ built a simple feed forward network for FDR estimation """ + if layers is None: + layers = [20, 10, 5] super().__init__() self.input_dim = input_dim self.output_dim = output_dim diff --git a/alphadia/fdrx/stats.py b/alphadia/fdrx/stats.py index 76e2491f..f8d97151 100644 --- a/alphadia/fdrx/stats.py +++ b/alphadia/fdrx/stats.py @@ -135,7 +135,7 @@ def fdr_to_q_values(fdr_values: np.ndarray): def keep_best( df: pd.DataFrame, score_column: str = "decoy_proba", - group_columns: list[str] = ["channel", "mod_seq_charge_hash"], + group_columns: list[str] | None = None, ): """Keep the best PSM for each group of PSMs with the same precursor_idx. This function is used to select the best candidate PSM for each precursor. @@ -159,6 +159,8 @@ def keep_best( pd.DataFrame The dataframe containing the best PSM for each group. """ + if group_columns is None: + group_columns = ["channel", "mod_seq_charge_hash"] df = df.reset_index(drop=True) df = df.sort_values(score_column, ascending=True) df = df.groupby(group_columns).head(1) diff --git a/alphadia/libtransform.py b/alphadia/libtransform.py index 1634ea6d..64390db8 100644 --- a/alphadia/libtransform.py +++ b/alphadia/libtransform.py @@ -85,7 +85,7 @@ def __call__(self, input: typing.Any) -> typing.Any: class DynamicLoader(ProcessingStep): - def __init__(self, modification_mapping={}) -> None: + def __init__(self, modification_mapping: dict | None = None) -> None: """Load a spectral library from a file. The file type is dynamically inferred from the file ending. Expects a `str` as input and will return a `SpecLibBase` object. @@ -98,6 +98,8 @@ def __init__(self, modification_mapping={}) -> None: The classical spectral library format as returned by MSFragger. It will be imported and converted to a `SpecLibBase` format. This might require additional parsing information. """ + if modification_mapping is None: + modification_mapping = {} self.modification_mapping = modification_mapping def validate(self, input: str) -> bool: @@ -137,20 +139,27 @@ class FastaDigest(ProcessingStep): def __init__( self, enzyme: str = "trypsin", - fixed_modifications: list[str] = ["Carbamidomethyl@C"], - variable_modifications: list[str] = [ - "Oxidation@M", - "Acetyl@Prot N-term", - ], + fixed_modifications: list[str] | None = None, + variable_modifications: list[str] | None = None, missed_cleavages: int = 1, - precursor_len: list[int] = [7, 35], - precursor_charge: list[int] = [2, 4], - precursor_mz: list[int] = [400, 1200], + precursor_len: list[int] | None = None, + precursor_charge: list[int] | None = None, + precursor_mz: list[int] | None = None, max_var_mod_num: int = 1, ) -> None: """Digest a FASTA file into a spectral library. Expects a `List[str]` object as input and will return a `SpecLibBase` object. """ + if precursor_mz is None: + precursor_mz = [400, 1200] + if precursor_charge is None: + precursor_charge = [2, 4] + if precursor_len is None: + precursor_len = [7, 35] + if variable_modifications is None: + variable_modifications = ["Oxidation@M", "Acetyl@Prot N-term"] + if fixed_modifications is None: + fixed_modifications = ["Carbamidomethyl@C"] super().__init__() self.enzyme = enzyme self.fixed_modifications = fixed_modifications @@ -242,11 +251,11 @@ def __init__( self, use_gpu: bool = True, mp_process_num: int = 8, - fragment_mz: list[int] = [100, 2000], + fragment_mz: list[int] | None = None, nce: int = 25, instrument: str = "Lumos", checkpoint_folder_path: str | None = None, - fragment_types: list[str] = ["b", "y"], + fragment_types: list[str] | None = None, max_fragment_charge: int = 2, ) -> None: """Predict the retention time of a spectral library using PeptDeep. @@ -278,6 +287,10 @@ def __init__( max_fragment_charge : int, optional Maximum charge state to predict. Default is 2. """ + if fragment_types is None: + fragment_types = ["b", "y"] + if fragment_mz is None: + fragment_mz = [100, 2000] super().__init__() self.use_gpu = use_gpu self.fragment_mz = fragment_mz diff --git a/alphadia/outputaccumulator.py b/alphadia/outputaccumulator.py index 02ae6950..1d40f706 100644 --- a/alphadia/outputaccumulator.py +++ b/alphadia/outputaccumulator.py @@ -80,27 +80,7 @@ def _calculate_fragment_position(self): def parse_output_folder( self, folder: str, - selected_precursor_columns: list[str] = [ - "precursor_idx", - "sequence", - "flat_frag_start_idx", - "flat_frag_stop_idx", - "charge", - "rt_library", - "rt_observed", - "rt_calibrated", - "mobility_library", - "mobility_observed", - "mz_library", - "mz_observed", - "mz_calibrated", - "proteins", - "genes", - "mods", - "mod_sites", - "proba", - "decoy", - ], + selected_precursor_columns: list[str] | None = None, ) -> tuple[pd.DataFrame, pd.DataFrame]: """ Parse the output folder to get a precursor and fragment dataframe in the flat format. @@ -121,6 +101,28 @@ def parse_output_folder( """ + if selected_precursor_columns is None: + selected_precursor_columns = [ + "precursor_idx", + "sequence", + "flat_frag_start_idx", + "flat_frag_stop_idx", + "charge", + "rt_library", + "rt_observed", + "rt_calibrated", + "mobility_library", + "mobility_observed", + "mz_library", + "mz_observed", + "mz_calibrated", + "proteins", + "genes", + "mods", + "mod_sites", + "proba", + "decoy", + ] psm_df = pd.read_parquet(os.path.join(folder, "psm.parquet")) frag_df = pd.read_parquet(os.path.join(folder, "frag.parquet")) diff --git a/alphadia/outputtransform.py b/alphadia/outputtransform.py index 0ca8d23b..2a78e745 100644 --- a/alphadia/outputtransform.py +++ b/alphadia/outputtransform.py @@ -829,7 +829,9 @@ def build_library( return mbr_spec_lib -def _build_run_stat_df(raw_name: str, run_df: pd.DataFrame, channels: list[int] = [0]): +def _build_run_stat_df( + raw_name: str, run_df: pd.DataFrame, channels: list[int] | None = None +): """Build stat dataframe for a single run. Parameters @@ -841,8 +843,8 @@ def _build_run_stat_df(raw_name: str, run_df: pd.DataFrame, channels: list[int] run_df: pd.DataFrame Dataframe containing the precursor data - channels: List[int] - List of channels to include in the output + channels: List[int], optional + List of channels to include in the output, default=[0] Returns ------- @@ -851,6 +853,8 @@ def _build_run_stat_df(raw_name: str, run_df: pd.DataFrame, channels: list[int] """ + if channels is None: + channels = [0] out_df = [] for channel in channels: diff --git a/alphadia/planning.py b/alphadia/planning.py index cc1e8e15..dc7fd485 100644 --- a/alphadia/planning.py +++ b/alphadia/planning.py @@ -50,10 +50,10 @@ class Plan: def __init__( self, output_folder: str, - raw_path_list: list[str] = [], + raw_path_list: list[str] | None = None, library_path: str | None = None, - fasta_path_list: list[str] = [], - config: dict | None = {}, + fasta_path_list: list[str] | None = None, + config: dict | None = None, config_base_path: str | None = None, ) -> None: """Highest level class to plan a DIA Search. @@ -75,6 +75,12 @@ def __init__( dict to update the default config. Can be used for debugging purposes etc. """ + if config is None: + config = {} + if fasta_path_list is None: + fasta_path_list = [] + if raw_path_list is None: + raw_path_list = [] self.output_folder = output_folder reporting.init_logging(self.output_folder) @@ -288,10 +294,12 @@ def run( self, figure_path=None, neptune_token=None, - neptune_tags=[], + neptune_tags=None, keep_decoys=False, fdr=0.01, ): + if neptune_tags is None: + neptune_tags = [] logger.progress("Starting Search Workflows") workflow_folder_list = [] diff --git a/alphadia/plexscoring.py b/alphadia/plexscoring.py index acde8e18..2521f746 100644 --- a/alphadia/plexscoring.py +++ b/alphadia/plexscoring.py @@ -32,7 +32,7 @@ def candidate_features_to_candidates( candidate_features_df: pd.DataFrame, - optional_columns: list[str] = ["proba"], + optional_columns: list[str] | None = None, ): """create candidates_df from candidate_features_df @@ -50,6 +50,8 @@ def candidate_features_to_candidates( """ # validate candidate_features_df input + if optional_columns is None: + optional_columns = ["proba"] validate.candidate_features_df(candidate_features_df.copy()) required_columns = [ @@ -76,7 +78,7 @@ def multiplex_candidates( candidates_df: pd.DataFrame, precursors_flat_df: pd.DataFrame, remove_decoys: bool = True, - channels: list[int] = [0, 4, 8, 12], + channels: list[int] | None = None, ): """Takes a candidates dataframe and a precursors dataframe and returns a multiplexed candidates dataframe. All original candidates will be retained. For missing candidates, the best scoring candidate in the elution group will be used and multiplexed across all missing channels. @@ -103,7 +105,8 @@ def multiplex_candidates( Multiplexed candidates dataframe """ - + if channels is None: + channels = [0, 4, 8, 12] precursors_flat_view = precursors_flat_df.copy() best_candidate_view = candidates_df.copy() diff --git a/alphadia/transferlearning/train.py b/alphadia/transferlearning/train.py index d4f37686..30cbf07d 100644 --- a/alphadia/transferlearning/train.py +++ b/alphadia/transferlearning/train.py @@ -215,8 +215,13 @@ class FinetuneManager(ModelManager): """ def __init__( - self, mask_modloss: bool = False, device: str = "gpu", settings: dict = {} + self, + mask_modloss: bool = False, + device: str = "gpu", + settings: dict | None = None, ): + if settings is None: + settings = {} super().__init__(mask_modloss, device) self.device = device self.settings = settings diff --git a/alphadia/workflow/config.py b/alphadia/workflow/config.py index 5d5fc968..df3de56d 100644 --- a/alphadia/workflow/config.py +++ b/alphadia/workflow/config.py @@ -48,7 +48,9 @@ def get_tree_structure(last_item_arr: list[bool], update=False): return tree_structure -def print_w_style(string: str, style: str = "auto", last_item_arr=[False]) -> None: +def print_w_style( + string: str, style: str = "auto", last_item_arr: list[bool] | None = None +) -> None: """ Print string with tree structure and uses ANSI color codes to color the string base on the style: - update: green color @@ -64,13 +66,12 @@ def print_w_style(string: str, style: str = "auto", last_item_arr=[False]) -> No style : str Style of the string - level : int, optional - Level of the string, by default - - last_item : bool, optional - If the string is the last item in the list or dict, by default False + last_item_arr : list[bool], optional + If the string is the last item in the list or dict, by default [False] """ + if last_item_arr is None: + last_item_arr = [False] if style == "auto": # Check what the config name in string inside the brackets ( ) # If the source is default, remove the brackets and set style to default @@ -109,7 +110,7 @@ def print_recursively( level: int = 0, style: str = "auto", last_item: bool = False, - last_item_arr=[], + last_item_arr: list | None = None, ) -> None: """ Recursively print any config with tree structure and uses ANSI color codes to color the string based on the style. @@ -128,8 +129,11 @@ def print_recursively( last_item : bool, optional If the config is the last item in the list or dict, by default False. + last_item_arr : TODO """ + if last_item_arr is None: + last_item_arr = [] if isinstance(config, tuple): print_w_style( f"{config[0]} ({config[1]})", style=style, last_item_arr=last_item_arr @@ -258,7 +262,7 @@ def update_recursive( level: int = 0, print_output: bool = True, is_leaf_node: bool = False, - last_item_arr=[], + last_item_arr: list | None = None, ) -> dict[str, Any] | list[Any]: """ Recursively update the default config with the experiments config @@ -281,7 +285,11 @@ def update_recursive( is_leaf_node : bool, optional Whether the config is a leaf node or not, by default False This is used to determine the style of the config only does not affect the update process + + last_item_arr: TODO """ + if last_item_arr is None: + last_item_arr = [] parent_key = config["key"] default_config = config["value"] # If the default config is a leaf node, then we can update it diff --git a/alphadia/workflow/manager.py b/alphadia/workflow/manager.py index d1ab85e1..fe3fa199 100644 --- a/alphadia/workflow/manager.py +++ b/alphadia/workflow/manager.py @@ -341,7 +341,14 @@ def get_estimator(self, group_name: str, estimator_name: str): ) return None - def fit(self, df: pd.DataFrame, group_name: str, skip=[], *args, **kwargs): + def fit( + self, + df: pd.DataFrame, + group_name: str, + skip: list | None = None, + *args, + **kwargs, + ): """Fit all estimators in a calibration group. Parameters @@ -353,8 +360,12 @@ def fit(self, df: pd.DataFrame, group_name: str, skip=[], *args, **kwargs): group_name : str Name of the calibration group + skip: TODO + """ + if skip is None: + skip = [] if len(self.estimator_groups) == 0: raise ValueError("No estimators defined") diff --git a/alphadia/workflow/peptidecentric.py b/alphadia/workflow/peptidecentric.py index 7df8f13f..a9c6f7b9 100644 --- a/alphadia/workflow/peptidecentric.py +++ b/alphadia/workflow/peptidecentric.py @@ -1096,7 +1096,7 @@ def requantify_fragments( def _build_candidate_speclib_flat( psm_df: pd.DataFrame, - fragment_types: list[str] = ["b", "y"], + fragment_types: list[str] | None = None, max_charge: int = 2, optional_columns: list[str] | None = None, ) -> tuple[SpecLibFlat, pd.DataFrame]: @@ -1143,6 +1143,8 @@ def _build_candidate_speclib_flat( """ # set default optional columns + if fragment_types is None: + fragment_types = ["b", "y"] if optional_columns is None: optional_columns = [ "proba", diff --git a/alphadia/workflow/reporting.py b/alphadia/workflow/reporting.py index 67e5e5c8..1f53ae75 100644 --- a/alphadia/workflow/reporting.py +++ b/alphadia/workflow/reporting.py @@ -187,7 +187,7 @@ def log_event(self, name: str, value: typing.Any, *args, **kwargs): class FigureBackend(Backend): FIGURE_PATH = "figures" - def __init__(self, path=None, default_savefig_kwargs={"dpi": 300}) -> None: + def __init__(self, path=None, default_savefig_kwargs=None) -> None: """Backend which logs figures to a folder. implements the `log_figure` method. @@ -203,6 +203,8 @@ def __init__(self, path=None, default_savefig_kwargs={"dpi": 300}) -> None: Default arguments to pass to matplotlib.figure.Figure.savefig """ + if default_savefig_kwargs is None: + default_savefig_kwargs = {"dpi": 300} self.path = path if self.path is None: @@ -256,7 +258,7 @@ def __init__( self, path=None, enable_figure=True, - default_savefig_kwargs={"dpi": 300}, + default_savefig_kwargs=None, ) -> None: """Backend which logs metrics, plots and strings to a JSONL file. It implements `log_figure`, `log_metric` , `log_string` and `log_event` methods. @@ -277,6 +279,8 @@ def __init__( """ + if default_savefig_kwargs is None: + default_savefig_kwargs = {"dpi": 300} self.path = path if self.path is None: @@ -545,7 +549,7 @@ def __exit__(self, exc_type, exc_value, exc_traceback): class Pipeline: def __init__( self, - backends: list[type[Backend]] = [], + backends: list[type[Backend]] = None, ): """Metric logger which allows to log metrics, plots and strings to multiple backends. @@ -558,6 +562,8 @@ def __init__( # the context will store a Context object # this allows backends which require a context to be used + if backends is None: + backends = [] self.context = Context(self) # instantiate backends diff --git a/pyproject.toml b/pyproject.toml index 12c760be..ede85129 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -87,6 +87,5 @@ select = [ ignore = [ "E501", # Line too long (ruff wraps code, but not docstrings) "B028", # No explicit `stacklevel` keyword argument found (for warnings) - "B006", # B006 Do not use mutable data structures for argument defaults "B905" # TODO revisit: `zip()` without an explicit `strict=` parameter ]