MannLabs · mschwoer · Jun 19, 2024 · Jun 19, 2024 · Jun 19, 2024 · Jun 19, 2024
diff --git a/alphadia/calibration/models.py b/alphadia/calibration/models.py
@@ -145,11 +145,10 @@ def fit(self, x: np.ndarray, y: np.ndarray):
         # === start === sanity checks ===
         # Does not yet work with more than one input dimension
         # axis-wise scaling and improved distance function need to be implemented
-        if len(x.shape) > 1:
-            if x.shape[1] > 1:
-                raise ValueError(
-                    "Input arrays with more than one feature not yet supported. Please provide a matrix of shape (n_datapoints, 1) or (n_datapoints,)"
-                )
+        if len(x.shape) > 1 and x.shape[1] > 1:
+            raise ValueError(
+                "Input arrays with more than one feature not yet supported. Please provide a matrix of shape (n_datapoints, 1) or (n_datapoints,)"
+            )
 
         # at least two datapoints required
         if len(x.flat) < 2:

diff --git a/alphadia/cli.py b/alphadia/cli.py
@@ -190,13 +190,13 @@ def parse_raw_path_list(args: argparse.Namespace, config: dict) -> list:
     raw_path_list : list
         List of raw files.
     """
-    config_raw_path_list = config["raw_path_list"] if "raw_path_list" in config else []
+    config_raw_path_list = config.get("raw_path_list", [])
     raw_path_list = (
         utils.windows_to_wsl(config_raw_path_list) if args.wsl else config_raw_path_list
     )
     raw_path_list += utils.windows_to_wsl(args.file) if args.wsl else args.file
 
-    config_directory = config["directory"] if "directory" in config else None
+    config_directory = config.get("directory")
     directory = utils.windows_to_wsl(config_directory) if args.wsl else config_directory
     if directory is not None:
         raw_path_list += [os.path.join(directory, f) for f in os.listdir(directory)]
@@ -274,7 +274,7 @@ def parse_fasta(args: argparse.Namespace, config: dict) -> list:
         List of fasta files.
     """
 
-    config_fasta_path_list = config["fasta_list"] if "fasta_list" in config else []
+    config_fasta_path_list = config.get("fasta_list", [])
     fasta_path_list = (
         utils.windows_to_wsl(config_fasta_path_list)
         if args.wsl

diff --git a/alphadia/data/alpharaw.py b/alphadia/data/alpharaw.py
@@ -383,15 +383,14 @@ def filter_spectra(self, cv: float = None, astral_ms1: bool = False, **kwargs):
             ]
 
         # filter for cv values if multiple cv values are present
-        if cv is not None:
-            if "cv" in self.spectrum_df.columns:
-                # use np.isclose to account for floating point errors
-                logger.info(f"Filtering for CV {cv}")
-                logger.info(f"Before: {len(self.spectrum_df)}")
-                self.spectrum_df = self.spectrum_df[
-                    np.isclose(self.spectrum_df["cv"], cv, atol=0.1)
-                ]
-                logger.info(f"After: {len(self.spectrum_df)}")
+        if cv is not None and "cv" in self.spectrum_df.columns:
+            # use np.isclose to account for floating point errors
+            logger.info(f"Filtering for CV {cv}")
+            logger.info(f"Before: {len(self.spectrum_df)}")
+            self.spectrum_df = self.spectrum_df[
+                np.isclose(self.spectrum_df["cv"], cv, atol=0.1)
+            ]
+            logger.info(f"After: {len(self.spectrum_df)}")
 
         self.spectrum_df["spec_idx"] = np.arange(len(self.spectrum_df))
 

diff --git a/alphadia/fdr.py b/alphadia/fdr.py
@@ -134,24 +134,23 @@ def perform_fdr(
 
     psm_df = get_q_values(psm_df, "proba", "_decoy")
 
-    if dia_cycle is not None:
-        if dia_cycle.shape[2] <= 2:
-            # use a FDR of 10% as starting point
-            # if there are no PSMs with a FDR < 10% use all PSMs
-            start_idx = psm_df["qval"].searchsorted(fdr_heuristic, side="left")
-            if start_idx == 0:
-                start_idx = len(psm_df)
-
-            # make sure fragments are not reused
-            if df_fragments is not None:
-                if dia_cycle is None:
-                    raise ValueError(
-                        "dia_cycle must be provided if reuse_fragments is False"
-                    )
-                fragment_competition = fragcomp.FragmentCompetition()
-                psm_df = fragment_competition(
-                    psm_df.iloc[:start_idx], df_fragments, dia_cycle
+    if dia_cycle is not None and dia_cycle.shape[2] <= 2:
+        # use a FDR of 10% as starting point
+        # if there are no PSMs with a FDR < 10% use all PSMs
+        start_idx = psm_df["qval"].searchsorted(fdr_heuristic, side="left")
+        if start_idx == 0:
+            start_idx = len(psm_df)
+
+        # make sure fragments are not reused
+        if df_fragments is not None:
+            if dia_cycle is None:
+                raise ValueError(
+                    "dia_cycle must be provided if reuse_fragments is False"
                 )
+            fragment_competition = fragcomp.FragmentCompetition()
+            psm_df = fragment_competition(
+                psm_df.iloc[:start_idx], df_fragments, dia_cycle
+            )
 
     psm_df = keep_best(psm_df, group_columns=group_columns)
     psm_df = get_q_values(psm_df, "proba", "_decoy")

diff --git a/alphadia/features.py b/alphadia/features.py
@@ -557,14 +557,13 @@ def build_features(
     features["mean_observation_score"] = 0
     features["var_observation_score"] = 1
 
-    if np.sum(peak_fragment_mask_1d) > 0:
-        if n_observations > 1:
-            observation_score = cosine_similarity_a1(
-                total_template_intensity,
-                observed_fragment_intensity[peak_fragment_mask_1d],
-            ).astype(np.float32)
-            features["mean_observation_score"] = np.mean(observation_score)
-            features["var_observation_score"] = np.var(observation_score)
+    if np.sum(peak_fragment_mask_1d) > 0 and n_observations > 1:
+        observation_score = cosine_similarity_a1(
+            total_template_intensity,
+            observed_fragment_intensity[peak_fragment_mask_1d],
+        ).astype(np.float32)
+        features["mean_observation_score"] = np.mean(observation_score)
+        features["var_observation_score"] = np.var(observation_score)
 
     fragment_features["mz_library"] = fragments.mz_library[fragment_mask_1d]
     fragment_features["mz_observed"] = observed_fragment_mz_mean[
@@ -1336,14 +1335,16 @@ def rank_features(current_candidate_idx, candidate_list):
             count = 0
 
             for i_candidate in range(len(candidate_list)):
-                if i_candidate != current_candidate_idx:
-                    if feature in candidate_list[i_candidate].features:
-                        if (
-                            candidate_list[i_candidate].features[feature]
-                            < candidate_list[current_candidate_idx].features[feature]
-                        ):
-                            rank += 1
-                        count += 1
+                if (
+                    i_candidate != current_candidate_idx
+                    and feature in candidate_list[i_candidate].features
+                ):
+                    if (
+                        candidate_list[i_candidate].features[feature]
+                        < candidate_list[current_candidate_idx].features[feature]
+                    ):
+                        rank += 1
+                    count += 1
 
         if count > 0:
             feature_dict[feature + "_rank"] = rank / count
@@ -1356,14 +1357,16 @@ def rank_features(current_candidate_idx, candidate_list):
             count = 0
 
             for i_candidate in range(len(candidate_list)):
-                if i_candidate != current_candidate_idx:
-                    if feature in candidate_list[i_candidate].features:
-                        if (
-                            candidate_list[i_candidate].features[feature]
-                            > candidate_list[current_candidate_idx].features[feature]
-                        ):
-                            rank += 1
-                        count += 1
+                if (
+                    i_candidate != current_candidate_idx
+                    and feature in candidate_list[i_candidate].features
+                ):
+                    if (
+                        candidate_list[i_candidate].features[feature]
+                        > candidate_list[current_candidate_idx].features[feature]
+                    ):
+                        rank += 1
+                    count += 1
 
         if count > 0:
             feature_dict[feature + "_rank"] = rank / count

diff --git a/alphadia/libtransform.py b/alphadia/libtransform.py
@@ -103,7 +103,7 @@ def __init__(self, modification_mapping={}) -> None:
     def validate(self, input: str) -> bool:
         """Validate the input object. It is expected that the input is a path to a file which exists."""
         valid = True
-        valid &= isinstance(input, str) or isinstance(input, Path)
+        valid &= isinstance(input, str | Path)
 
         if not os.path.exists(input):
             logger.error(f"Input path {input} does not exist")
@@ -569,15 +569,14 @@ def forward(self, input: SpecLibBase) -> SpecLibBase:
             )
             return input
 
-        if "rt" not in input.precursor_df.columns:
-            if (
-                "rt_norm" in input.precursor_df.columns
-                or "rt_norm_pred" in input.precursor_df.columns
-            ):
-                logger.warning(
-                    "Input library already contains normalized RT information. Skipping RT normalization"
-                )
-                return input
+        if "rt" not in input.precursor_df.columns and (
+            "rt_norm" in input.precursor_df.columns
+            or "rt_norm_pred" in input.precursor_df.columns
+        ):
+            logger.warning(
+                "Input library already contains normalized RT information. Skipping RT normalization"
+            )
+            return input
 
         percentiles = np.percentile(input.precursor_df["rt"], [0.1, 99.9])
         input._precursor_df["rt"] = np.clip(

diff --git a/alphadia/numba/config.py b/alphadia/numba/config.py
@@ -147,26 +147,30 @@ def __init__(self):
                     )
 
             # check if dtype matches
-            if isinstance(value, np.ndarray):
-                if value.dtype != getattr(self, key).dtype:
-                    try:
-                        value = value.astype(getattr(self, key).dtype)
-                    except Exception:
-                        self.reporter.log_string(
-                            f"Parameter {key} has wrong dtype {value.dtype}",
-                            verbosity="error",
-                        )
-                        continue
-
-            # check if dimensions match
-            if isinstance(value, np.ndarray):
-                if value.shape != getattr(self, key).shape:
+            if (
+                isinstance(value, np.ndarray)
+                and value.dtype != getattr(self, key).dtype
+            ):
+                try:
+                    value = value.astype(getattr(self, key).dtype)
+                except Exception:
                     self.reporter.log_string(
-                        f"Parameter {key} has wrong shape {value.shape}",
+                        f"Parameter {key} has wrong dtype {value.dtype}",
                         verbosity="error",
                     )
                     continue
 
+            # check if dimensions match
+            if (
+                isinstance(value, np.ndarray)
+                and value.shape != getattr(self, key).shape
+            ):
+                self.reporter.log_string(
+                    f"Parameter {key} has wrong shape {value.shape}",
+                    verbosity="error",
+                )
+                continue
+
             # update attribute
             setattr(self, key, value)
 

diff --git a/alphadia/numba/fragments.py b/alphadia/numba/fragments.py
@@ -336,10 +336,7 @@ def get_ion_group_mapping(
     ):
         # score_group_idx = precursor_group[precursor]
 
-        if len(grouped_mz) == 0:
-            grouped_mz.append(mz)
-
-        elif np.abs(grouped_mz[-1] - mz) > EPSILON:
+        if len(grouped_mz) == 0 or np.abs(grouped_mz[-1] - mz) > EPSILON:
             grouped_mz.append(mz)
 
         idx = len(grouped_mz) - 1

diff --git a/alphadia/numba/numeric.py b/alphadia/numba/numeric.py
@@ -87,10 +87,7 @@ def get_mean_sparse0(dense, scan, cycle, threshold):
     mask = mz_window < threshold
     fraction_nonzero = np.mean(mask.astype("int8"))
 
-    if fraction_nonzero > 0:
-        values = np.mean(mz_window[mask])
-    else:
-        values = threshold
+    values = np.mean(mz_window[mask]) if fraction_nonzero > 0 else threshold
 
     return values
 

diff --git a/alphadia/peakgroup/search.py b/alphadia/peakgroup/search.py
@@ -394,11 +394,11 @@ def select_candidates(
         # "Empty dense precursor matrix"
         return
 
-    if not dense_fragments.shape[2] % 2 == 0:
+    if dense_fragments.shape[2] % 2 != 0:
         # "Dense fragment matrix not divisible by 2"
         return
 
-    if not dense_fragments.shape[2] % 2 == 0:
+    if dense_fragments.shape[2] % 2 != 0:
         # "Dense fragment matrix not divisible by 2"
         return
 
@@ -628,20 +628,17 @@ def build_candidates(
 ):
     cycle_length = jit_data.cycle.shape[1]
 
-    if weights is None:
-        feature_weights = np.ones(1)
-    else:
-        feature_weights = weights
+    feature_weights = np.ones(1) if weights is None else weights
 
     feature_weights = feature_weights.reshape(-1, 1, 1)
 
     smooth_precursor = fft.convolve_fourier(dense_precursors, kernel)
     smooth_fragment = fft.convolve_fourier(dense_fragments, kernel)
 
-    if not smooth_precursor.shape == dense_precursors.shape:
+    if smooth_precursor.shape != dense_precursors.shape:
         print(smooth_precursor.shape, dense_precursors.shape)
         print("smooth_precursor shape does not match dense_precursors shape")
-    if not smooth_fragment.shape == dense_fragments.shape:
+    if smooth_fragment.shape != dense_fragments.shape:
         print(smooth_fragment.shape, dense_fragments.shape)
         print("smooth_fragment shape does not match dense_fragments shape")
 

diff --git a/alphadia/plexscoring.py b/alphadia/plexscoring.py
@@ -1110,7 +1110,7 @@ def get_feature_columns(self):
                 if len(candidate.features) not in known_feature_lengths:
                     known_feature_lengths += [len(candidate.features)]
                     # add all new features to the list of known columns
-                    for key in candidate.features.keys():
+                    for key in candidate.features.keys():  # noqa: SIM118
                         if key not in known_columns:
                             known_columns += [key]
         return known_columns

diff --git a/alphadia/utils.py b/alphadia/utils.py
@@ -174,7 +174,7 @@ def recursive_update(full_dict: dict, update_dict: dict):
 
     """
     for key, value in update_dict.items():
-        if key in full_dict.keys():
+        if key in full_dict:
             if isinstance(value, dict):
                 recursive_update(full_dict[key], update_dict[key])
             else:

diff --git a/alphadia/validate.py b/alphadia/validate.py
@@ -68,9 +68,8 @@ def __call__(self, df, logging=True):
             If True, log the validation results
         """
 
-        if self.name in df.columns:
-            if df[self.name].dtype != self.type:
-                df[self.name] = df[self.name].astype(self.type)
+        if self.name in df.columns and df[self.name].dtype != self.type:
+            df[self.name] = df[self.name].astype(self.type)
 
         return True
 

diff --git a/alphadia/workflow/base.py b/alphadia/workflow/base.py
@@ -176,14 +176,7 @@ def _get_dia_data_object(
             shutil.copyfile(dia_data_path, tmp_dia_data_path)
             dia_data_path = tmp_dia_data_path
 
-        if file_extension.lower() == ".d":
-            self.reporter.log_metric("raw_data_type", "bruker")
-            dia_data = bruker.TimsTOFTranspose(
-                dia_data_path,
-                mmap_detector_events=self.config["general"]["mmap_detector_events"],
-            )
-
-        elif file_extension.lower() == ".hdf":
+        if file_extension.lower() == ".d" or file_extension.lower() == ".hdf":
             self.reporter.log_metric("raw_data_type", "bruker")
             dia_data = bruker.TimsTOFTranspose(
                 dia_data_path,
@@ -194,9 +187,11 @@ def _get_dia_data_object(
             self.reporter.log_metric("raw_data_type", "thermo")
             # check if cv selection exists
             cv = None
-            if "raw_data_loading" in self.config:
-                if "cv" in self.config["raw_data_loading"]:
-                    cv = self.config["raw_data_loading"]["cv"]
+            if (
+                "raw_data_loading" in self.config
+                and "cv" in self.config["raw_data_loading"]
+            ):
+                cv = self.config["raw_data_loading"]["cv"]
 
             dia_data = alpharaw.Thermo(
                 dia_data_path,

diff --git a/alphadia/workflow/config.py b/alphadia/workflow/config.py
@@ -156,7 +156,7 @@ def print_recursively(
                 )
                 continue
 
-            elif isinstance(value, list) or isinstance(value, dict):
+            elif isinstance(value, list | dict):
                 print_w_style(
                     f"{key}",
                     style=style,