MannLabs · mschwoer · Jun 19, 2024 · Jun 19, 2024
diff --git a/alphadia/calibration/property.py b/alphadia/calibration/property.py
@@ -22,9 +22,9 @@ def __init__(
         self,
         name: str = "",
         function: object = None,
-        input_columns: list[str] = [],
-        target_columns: list[str] = [],
-        output_columns: list[str] = [],
+        input_columns: list[str] | None = None,
+        target_columns: list[str] | None = None,
+        output_columns: list[str] | None = None,
         transform_deviation: None | float = None,
         **kwargs,
     ):
@@ -59,7 +59,12 @@ def __init__(
             If set to None, the deviation is expressed in absolute units.
 
         """
-
+        if output_columns is None:
+            output_columns = []
+        if target_columns is None:
+            target_columns = []
+        if input_columns is None:
+            input_columns = []
         self.name = name
         self.function = function
         self.input_columns = input_columns

diff --git a/alphadia/fdr.py b/alphadia/fdr.py
@@ -172,7 +172,7 @@ def perform_fdr(
 def keep_best(
     df: pd.DataFrame,
     score_column: str = "proba",
-    group_columns: list[str] = ["channel", "precursor_idx"],
+    group_columns: list[str] | None = None,
 ):
     """Keep the best PSM for each group of PSMs with the same precursor_idx.
     This function is used to select the best candidate PSM for each precursor.
@@ -196,6 +196,8 @@ def keep_best(
     pd.DataFrame
         The dataframe containing the best PSM for each group.
     """
+    if group_columns is None:
+        group_columns = ["channel", "precursor_idx"]
     temp_df = df.reset_index(drop=True)
     temp_df = temp_df.sort_values(score_column, ascending=True)
     temp_df = temp_df.groupby(group_columns).head(1)

diff --git a/alphadia/fdrexperimental.py b/alphadia/fdrexperimental.py
@@ -127,7 +127,7 @@ def __init__(
         epochs: int = 10,
         learning_rate: float = 0.0002,
         weight_decay: float = 0.00001,
-        layers: list[int] = [100, 50, 20, 5],
+        layers: list[int] | None = None,
         dropout: float = 0.001,
         calculate_metrics: bool = True,
         metric_interval: int = 1,
@@ -186,6 +186,8 @@ def __init__(
             Whether to use GPU acceleration if available.
         """
 
+        if layers is None:
+            layers = [100, 50, 20, 5]
         self.test_size = test_size
         self.max_batch_size = max_batch_size
         self.min_batch_number = min_batch_number
@@ -605,7 +607,7 @@ def __init__(
         epochs: int = 10,
         learning_rate: float = 0.0002,
         weight_decay: float = 0.00001,
-        layers: list[int] = [100, 50, 20, 5],
+        layers: list[int] | None = None,
         dropout: float = 0.001,
         metric_interval: int = 1000,
         **kwargs,
@@ -646,7 +648,8 @@ def __init__(
             Interval for logging metrics during training.
 
         """
-
+        if layers is None:
+            layers = [100, 50, 20, 5]
         self.test_size = test_size
         self.batch_size = batch_size
         self.epochs = epochs
@@ -919,7 +922,7 @@ def __init__(
         epochs: int = 10,
         learning_rate: float = 0.0002,
         weight_decay: float = 0.00001,
-        layers: list[int] = [100, 50, 20, 5],
+        layers: list[int] | None = None,
         dropout: float = 0.001,
         metric_interval: int = 1000,
         **kwargs,
@@ -960,7 +963,8 @@ def __init__(
             Interval for logging metrics during training.
 
         """
-
+        if layers is None:
+            layers = [100, 50, 20, 5]
         self.test_size = test_size
         self.batch_size = batch_size
         self.epochs = epochs
@@ -1236,13 +1240,15 @@ def __init__(
         self,
         input_dim,
         output_dim=2,
-        layers=[20, 10, 5],
+        layers: list[int] | None = None,
         dropout=0.5,
     ):
         """
         built a simple feed forward network for FDR estimation
 
         """
+        if layers is None:
+            layers = [20, 10, 5]
         super().__init__()
         self.input_dim = input_dim
         self.output_dim = output_dim

diff --git a/alphadia/fdrx/stats.py b/alphadia/fdrx/stats.py
@@ -135,7 +135,7 @@ def fdr_to_q_values(fdr_values: np.ndarray):
 def keep_best(
     df: pd.DataFrame,
     score_column: str = "decoy_proba",
-    group_columns: list[str] = ["channel", "mod_seq_charge_hash"],
+    group_columns: list[str] | None = None,
 ):
     """Keep the best PSM for each group of PSMs with the same precursor_idx.
     This function is used to select the best candidate PSM for each precursor.
@@ -159,6 +159,8 @@ def keep_best(
     pd.DataFrame
         The dataframe containing the best PSM for each group.
     """
+    if group_columns is None:
+        group_columns = ["channel", "mod_seq_charge_hash"]
     df = df.reset_index(drop=True)
     df = df.sort_values(score_column, ascending=True)
     df = df.groupby(group_columns).head(1)

diff --git a/alphadia/libtransform.py b/alphadia/libtransform.py
@@ -85,7 +85,7 @@ def __call__(self, input: typing.Any) -> typing.Any:
 
 
 class DynamicLoader(ProcessingStep):
-    def __init__(self, modification_mapping={}) -> None:
+    def __init__(self, modification_mapping: dict | None = None) -> None:
         """Load a spectral library from a file. The file type is dynamically inferred from the file ending.
         Expects a `str` as input and will return a `SpecLibBase` object.
 
@@ -98,6 +98,8 @@ def __init__(self, modification_mapping={}) -> None:
         The classical spectral library format as returned by MSFragger.
         It will be imported and converted to a `SpecLibBase` format. This might require additional parsing information.
         """
+        if modification_mapping is None:
+            modification_mapping = {}
         self.modification_mapping = modification_mapping
 
     def validate(self, input: str) -> bool:
@@ -137,20 +139,27 @@ class FastaDigest(ProcessingStep):
     def __init__(
         self,
         enzyme: str = "trypsin",
-        fixed_modifications: list[str] = ["Carbamidomethyl@C"],
-        variable_modifications: list[str] = [
-            "Oxidation@M",
-            "Acetyl@Prot N-term",
-        ],
+        fixed_modifications: list[str] | None = None,
+        variable_modifications: list[str] | None = None,
         missed_cleavages: int = 1,
-        precursor_len: list[int] = [7, 35],
-        precursor_charge: list[int] = [2, 4],
-        precursor_mz: list[int] = [400, 1200],
+        precursor_len: list[int] | None = None,
+        precursor_charge: list[int] | None = None,
+        precursor_mz: list[int] | None = None,
         max_var_mod_num: int = 1,
     ) -> None:
         """Digest a FASTA file into a spectral library.
         Expects a `List[str]` object as input and will return a `SpecLibBase` object.
         """
+        if precursor_mz is None:
+            precursor_mz = [400, 1200]
+        if precursor_charge is None:
+            precursor_charge = [2, 4]
+        if precursor_len is None:
+            precursor_len = [7, 35]
+        if variable_modifications is None:
+            variable_modifications = ["Oxidation@M", "Acetyl@Prot N-term"]
+        if fixed_modifications is None:
+            fixed_modifications = ["Carbamidomethyl@C"]
         super().__init__()
         self.enzyme = enzyme
         self.fixed_modifications = fixed_modifications
@@ -242,11 +251,11 @@ def __init__(
         self,
         use_gpu: bool = True,
         mp_process_num: int = 8,
-        fragment_mz: list[int] = [100, 2000],
+        fragment_mz: list[int] | None = None,
         nce: int = 25,
         instrument: str = "Lumos",
         checkpoint_folder_path: str | None = None,
-        fragment_types: list[str] = ["b", "y"],
+        fragment_types: list[str] | None = None,
         max_fragment_charge: int = 2,
     ) -> None:
         """Predict the retention time of a spectral library using PeptDeep.
@@ -278,6 +287,10 @@ def __init__(
         max_fragment_charge : int, optional
             Maximum charge state to predict. Default is 2.
         """
+        if fragment_types is None:
+            fragment_types = ["b", "y"]
+        if fragment_mz is None:
+            fragment_mz = [100, 2000]
         super().__init__()
         self.use_gpu = use_gpu
         self.fragment_mz = fragment_mz

diff --git a/alphadia/outputaccumulator.py b/alphadia/outputaccumulator.py
@@ -80,27 +80,7 @@ def _calculate_fragment_position(self):
     def parse_output_folder(
         self,
         folder: str,
-        selected_precursor_columns: list[str] = [
-            "precursor_idx",
-            "sequence",
-            "flat_frag_start_idx",
-            "flat_frag_stop_idx",
-            "charge",
-            "rt_library",
-            "rt_observed",
-            "rt_calibrated",
-            "mobility_library",
-            "mobility_observed",
-            "mz_library",
-            "mz_observed",
-            "mz_calibrated",
-            "proteins",
-            "genes",
-            "mods",
-            "mod_sites",
-            "proba",
-            "decoy",
-        ],
+        selected_precursor_columns: list[str] | None = None,
     ) -> tuple[pd.DataFrame, pd.DataFrame]:
         """
         Parse the output folder to get a precursor and fragment dataframe in the flat format.
@@ -121,6 +101,28 @@ def parse_output_folder(
 
 
         """
+        if selected_precursor_columns is None:
+            selected_precursor_columns = [
+                "precursor_idx",
+                "sequence",
+                "flat_frag_start_idx",
+                "flat_frag_stop_idx",
+                "charge",
+                "rt_library",
+                "rt_observed",
+                "rt_calibrated",
+                "mobility_library",
+                "mobility_observed",
+                "mz_library",
+                "mz_observed",
+                "mz_calibrated",
+                "proteins",
+                "genes",
+                "mods",
+                "mod_sites",
+                "proba",
+                "decoy",
+            ]
         psm_df = pd.read_parquet(os.path.join(folder, "psm.parquet"))
         frag_df = pd.read_parquet(os.path.join(folder, "frag.parquet"))
 

diff --git a/alphadia/outputtransform.py b/alphadia/outputtransform.py
@@ -829,7 +829,9 @@ def build_library(
         return mbr_spec_lib
 
 
-def _build_run_stat_df(raw_name: str, run_df: pd.DataFrame, channels: list[int] = [0]):
+def _build_run_stat_df(
+    raw_name: str, run_df: pd.DataFrame, channels: list[int] | None = None
+):
     """Build stat dataframe for a single run.
 
     Parameters
@@ -841,8 +843,8 @@ def _build_run_stat_df(raw_name: str, run_df: pd.DataFrame, channels: list[int]
     run_df: pd.DataFrame
         Dataframe containing the precursor data
 
-    channels: List[int]
-        List of channels to include in the output
+    channels: List[int], optional
+        List of channels to include in the output, default=[0]
 
     Returns
     -------
@@ -851,6 +853,8 @@ def _build_run_stat_df(raw_name: str, run_df: pd.DataFrame, channels: list[int]
 
     """
 
+    if channels is None:
+        channels = [0]
     out_df = []
 
     for channel in channels:

diff --git a/alphadia/planning.py b/alphadia/planning.py
@@ -50,10 +50,10 @@ class Plan:
     def __init__(
         self,
         output_folder: str,
-        raw_path_list: list[str] = [],
+        raw_path_list: list[str] | None = None,
         library_path: str | None = None,
-        fasta_path_list: list[str] = [],
-        config: dict | None = {},
+        fasta_path_list: list[str] | None = None,
+        config: dict | None = None,
         config_base_path: str | None = None,
     ) -> None:
         """Highest level class to plan a DIA Search.
@@ -75,6 +75,12 @@ def __init__(
             dict to update the default config. Can be used for debugging purposes etc.
 
         """
+        if config is None:
+            config = {}
+        if fasta_path_list is None:
+            fasta_path_list = []
+        if raw_path_list is None:
+            raw_path_list = []
         self.output_folder = output_folder
         reporting.init_logging(self.output_folder)
 
@@ -288,10 +294,12 @@ def run(
         self,
         figure_path=None,
         neptune_token=None,
-        neptune_tags=[],
+        neptune_tags=None,
         keep_decoys=False,
         fdr=0.01,
     ):
+        if neptune_tags is None:
+            neptune_tags = []
         logger.progress("Starting Search Workflows")
 
         workflow_folder_list = []

diff --git a/alphadia/plexscoring.py b/alphadia/plexscoring.py
@@ -32,7 +32,7 @@
 
 def candidate_features_to_candidates(
     candidate_features_df: pd.DataFrame,
-    optional_columns: list[str] = ["proba"],
+    optional_columns: list[str] | None = None,
 ):
     """create candidates_df from candidate_features_df
 
@@ -50,6 +50,8 @@ def candidate_features_to_candidates(
     """
 
     # validate candidate_features_df input
+    if optional_columns is None:
+        optional_columns = ["proba"]
     validate.candidate_features_df(candidate_features_df.copy())
 
     required_columns = [
@@ -76,7 +78,7 @@ def multiplex_candidates(
     candidates_df: pd.DataFrame,
     precursors_flat_df: pd.DataFrame,
     remove_decoys: bool = True,
-    channels: list[int] = [0, 4, 8, 12],
+    channels: list[int] | None = None,
 ):
     """Takes a candidates dataframe and a precursors dataframe and returns a multiplexed candidates dataframe.
     All original candidates will be retained. For missing candidates, the best scoring candidate in the elution group will be used and multiplexed across all missing channels.
@@ -103,7 +105,8 @@ def multiplex_candidates(
         Multiplexed candidates dataframe
 
     """
-
+    if channels is None:
+        channels = [0, 4, 8, 12]
     precursors_flat_view = precursors_flat_df.copy()
     best_candidate_view = candidates_df.copy()
 

diff --git a/alphadia/transferlearning/train.py b/alphadia/transferlearning/train.py
@@ -215,8 +215,13 @@ class FinetuneManager(ModelManager):
     """
 
     def __init__(
-        self, mask_modloss: bool = False, device: str = "gpu", settings: dict = {}
+        self,
+        mask_modloss: bool = False,
+        device: str = "gpu",
+        settings: dict | None = None,
     ):
+        if settings is None:
+            settings = {}
         super().__init__(mask_modloss, device)
         self.device = device
         self.settings = settings