MannLabs · mschwoer · Jun 19, 2024 · Jun 18, 2024 · Jun 18, 2024 · Jun 18, 2024
diff --git a/alphadia/calibration/models.py b/alphadia/calibration/models.py
@@ -3,8 +3,6 @@
 from sklearn.preprocessing import PolynomialFeatures
 from sklearn.base import BaseEstimator, RegressorMixin
 
-from sklearn.utils.estimator_checks import check_estimator
-
 
 class LOESSRegression(BaseEstimator, RegressorMixin):
     """scikit-learn estimator which implements a LOESS style local polynomial regression. The number of basis functions or kernels can be explicitly defined which allows for faster and cheaper training and inference.

diff --git a/alphadia/calibration/property.py b/alphadia/calibration/property.py
@@ -1,5 +1,4 @@
 # native imports
-import os
 import logging
 import typing
 import pickle

diff --git a/alphadia/data/bruker.py b/alphadia/data/bruker.py
@@ -693,9 +693,9 @@ def assemble_push_intensity(
             len(unique_precursor_index)
         )
 
-        relative_precursor_index = precursor_index_reverse[precursor_index]
+        relative_precursor_index = precursor_index_reverse[precursor_index]  # noqa: F841  # TODO check for potential bug then remove this line
 
-        n_precursor_indices = len(unique_precursor_index)
+        n_precursor_indices = len(unique_precursor_index)  # noqa: F841  # TODO check for potential bug then remove this line
         n_tof_slices = len(tof_limits)
 
         # scan valuesa

diff --git a/alphadia/data/stats.py b/alphadia/data/stats.py
@@ -17,7 +17,7 @@ def log_stats(rt_values: np.array, cycle: np.array):
             DIA cycle object describing the msms pattern
     """
 
-    logger.info(f"============ Raw file stats ============")
+    logger.info("============ Raw file stats ============")
 
     rt_limits = rt_values.min() / 60, rt_values.max() / 60
     rt_duration_sec = rt_values.max() - rt_values.min()
@@ -41,4 +41,4 @@ def log_stats(rt_values: np.array, cycle: np.array):
 
     logger.info(f"{'MS2 range (m/z)':<20}: {msms_range[0]:.1f} - {msms_range[1]:.1f}")
 
-    logger.info(f"========================================")
+    logger.info("========================================")
diff --git a/alphadia/fdr.py b/alphadia/fdr.py
@@ -344,10 +344,10 @@ def plot_fdr(
     """
 
     y_test_proba = classifier.predict_proba(X_test)[:, 1]
-    y_test_pred = np.round(y_test_proba)
+    y_test_pred = np.round(y_test_proba)  # noqa: F841  # TODO check for potential bug then remove this line
 
     y_train_proba = classifier.predict_proba(X_train)[:, 1]
-    y_train_pred = np.round(y_train_proba)
+    y_train_pred = np.round(y_train_proba)  # noqa: F841  # TODO check for potential bug then remove this line
 
     fpr_test, tpr_test, thresholds_test = sklearn.metrics.roc_curve(
         y_test, y_test_proba

diff --git a/alphadia/features.py b/alphadia/features.py
@@ -466,10 +466,10 @@ def build_features(
     ).reshape(n_precursors, n_isotopes)
 
     # sum precursor
-    sum_precursor_intensity = np.sum(
+    sum_precursor_intensity = np.sum(  # noqa: F841  # TODO check for potential bug then remove this line
         np.sum(dense_precursors[0], axis=-1), axis=-1
     ).astype(np.float32)
-    sum_fragment_intensity = np.sum(
+    sum_fragment_intensity = np.sum(  # noqa: F841  # TODO check for potential bug then remove this line
         np.sum(dense_fragments[0], axis=-1), axis=-1
     ).astype(np.float32)
 
@@ -724,7 +724,7 @@ def fragment_features(
     quant_window: nb.uint32 = 3,
     quant_all: nb.boolean = False,
 ):
-    fragment_feature_dict = nb.typed.Dict.empty(
+    fragment_feature_dict = nb.typed.Dict.empty(  # noqa: F841  # TODO check for potential bug then remove this line
         key_type=nb.types.unicode_type, value_type=float_array
     )
 
@@ -780,7 +780,7 @@ def fragment_features(
 
     # (quant_window * 2 + 1)
     frame_rt_quant = frame_rt[center - quant_window : center + quant_window + 1]
-    quant_durarion = frame_rt_quant[-1] - frame_rt_quant[0]
+    quant_duration = frame_rt_quant[-1] - frame_rt_quant[0]  # noqa: F841  # TODO check for potential bug then remove this line
 
     # (quant_window * 2)
     delta_rt = frame_rt_quant[1:] - frame_rt_quant[:-1]
@@ -1020,7 +1020,7 @@ def profile_features(
 ):
     n_observations = len(observation_importance)
     # most intense observation across all observations
-    best_observation = np.argmax(observation_importance)
+    best_observation = np.argmax(observation_importance)  # noqa: F841  # TODO check for potential bug then remove this line
 
     fragment_idx_sorted = np.argsort(fragment_intensity)[::-1]
 
@@ -1203,8 +1203,8 @@ def reference_features(
         key_type=nb.types.unicode_type, value_type=nb.types.float32
     )
 
-    n_observation = reference_observation_importance.shape[0]
-    n_fragments = reference_fragments_scan_profile.shape[0]
+    n_observation = reference_observation_importance.shape[0]  # noqa: F841  # TODO check for potential bug then remove this line
+    n_fragments = reference_fragments_scan_profile.shape[0]  # noqa: F841  # TODO check for potential bug then remove this line
     fragment_idx_sorted = np.argsort(fragment_lib_intensity)[::-1]
 
     if (

diff --git a/alphadia/fragcomp.py b/alphadia/fragcomp.py
@@ -101,7 +101,6 @@ def compete_for_fragments(
     rt_window = rt[precursor_start_idx:precursor_stop_idx]
     valid_window = valid[precursor_start_idx:precursor_stop_idx]
 
-    k = 0
     for i, i_rt in enumerate(rt_window):
         if not valid_window[i]:
             continue

diff --git a/alphadia/numba/config.py b/alphadia/numba/config.py
@@ -140,7 +140,7 @@ def __init__(self):
             if not isinstance(value, type(getattr(self, key))):
                 try:
                     value = type(getattr(self, key))(value)
-                except Exception as e:
+                except Exception:
                     self.reporter.log_string(
                         f"Parameter {key} has wrong type {type(value)}",
                         verbosity="error",
@@ -151,7 +151,7 @@ def __init__(self):
                 if value.dtype != getattr(self, key).dtype:
                     try:
                         value = value.astype(getattr(self, key).dtype)
-                    except Exception as e:
+                    except Exception:
                         self.reporter.log_string(
                             f"Parameter {key} has wrong dtype {value.dtype}",
                             verbosity="error",

diff --git a/alphadia/numba/fragments.py b/alphadia/numba/fragments.py
@@ -9,8 +9,6 @@
 from numba.extending import overload_method
 import numpy as np
 
-import numba as nb
-
 
 @nb.experimental.jitclass()
 class FragmentContainer:

diff --git a/alphadia/outputtransform.py b/alphadia/outputtransform.py
@@ -30,7 +30,6 @@
 import directlfq.protein_intensity_estimation as lfqprot_estimation
 import directlfq.config as lfqconfig
 
-import logging
 
 logger = logging.getLogger()
 
@@ -132,7 +131,6 @@ def accumulate_frag_df(
         quality_df = df[["precursor_idx", "ion", "correlation"]].copy()
         quality_df.rename(columns={"correlation": raw_name}, inplace=True)
 
-        df_list = []
         for raw_name, df in df_iterable:
             df = prepare_df(df, self.psm_df, column=self.column)
 
@@ -402,9 +400,9 @@ def build_transfer_model(self):
         tune_mgr = FinetuneManager(
             device=device, settings=self.config["transfer_learning"]
         )
-        stats = tune_mgr.finetune_rt(transfer_lib.precursor_df)
-        stats = tune_mgr.finetune_charge(transfer_lib.precursor_df)
-        stats = tune_mgr.finetune_ms2(
+        stats = tune_mgr.finetune_rt(transfer_lib.precursor_df)  # noqa: F841  # TODO check for potential bug then remove this line
+        stats = tune_mgr.finetune_charge(transfer_lib.precursor_df)  # noqa: F841  # TODO check for potential bug then remove this line
+        stats = tune_mgr.finetune_ms2(  # noqa: F841  # TODO check for potential bug then remove this line
             transfer_lib.precursor_df.copy(), transfer_lib.fragment_intensity_df.copy()
         )
 

diff --git a/alphadia/planning.py b/alphadia/planning.py
@@ -338,24 +338,24 @@ def run(
                 workflow.reporter.context.__exit__(None, None, None)
                 del workflow
 
-            except peptidecentric.CalibrationError as e:
+            except peptidecentric.CalibrationError:
                 # get full traceback
                 logger.error(
                     f"Search for {raw_name} failed as not enough precursors were found for calibration"
                 )
-                logger.error(f"This can have the following reasons:")
+                logger.error("This can have the following reasons:")
                 logger.error(
-                    f"   1. The sample was empty and therefore nor precursors were found"
+                    "   1. The sample was empty and therefore nor precursors were found"
                 )
-                logger.error(f"   2. The sample contains only very few precursors.")
+                logger.error("   2. The sample contains only very few precursors.")
                 logger.error(
-                    f"      For small libraries, try to set recalibration_target to a lower value"
+                    "      For small libraries, try to set recalibration_target to a lower value"
                 )
                 logger.error(
-                    f"      For large libraries, try to reduce the library size and reduce the calibration MS1 and MS2 tolerance"
+                    "      For large libraries, try to reduce the library size and reduce the calibration MS1 and MS2 tolerance"
                 )
                 logger.error(
-                    f"   3. There was a fundamental issue with search parameters"
+                    "   3. There was a fundamental issue with search parameters"
                 )
                 continue
             except Exception as e:

diff --git a/alphadia/plotting/cycle.py b/alphadia/plotting/cycle.py
@@ -1,12 +1,13 @@
 # native imports
+import os
 import typing
 
 # alphadia imports
 
 # alpha family imports
 
 # third party imports
-from matplotlib import cm, patches
+from matplotlib import patches
 import numpy as np
 import matplotlib.pyplot as plt
 

diff --git a/alphadia/transferlearning/train.py b/alphadia/transferlearning/train.py
@@ -4,7 +4,6 @@
 from tqdm import tqdm
 from torch.optim.lr_scheduler import LambdaLR
 from alphabase.peptide.fragment import remove_unused_fragments
-from alphabase.spectral_library.flat import *
 
 from alphadia.transferlearning.metrics import (
     MetricManager,
@@ -20,11 +19,9 @@
 from peptdeep.settings import global_settings
 from peptdeep.pretrained_models import ModelManager
 from peptdeep.model.model_interface import LR_SchedulerInterface, CallbackHandler
-from peptdeep.model.ms2 import normalize_fragment_intensities
 from peptdeep.model.charge import ChargeModelForModAASeq
 import logging
 
-from alphadia.workflow import reporting
 
 logger = logging.getLogger()
 

diff --git a/alphadia/utils.py b/alphadia/utils.py
@@ -423,8 +423,8 @@ def fourier_filter(dense_stack, kernel):
 
     """
 
-    k0 = kernel.shape[0]
-    k1 = kernel.shape[1]
+    k0 = kernel.shape[0]  # noqa: F841  # TODO check for potential bug then remove this line
+    k1 = kernel.shape[1]  # noqa: F841  # TODO check for potential bug then remove this line
 
     # make sure both dimensions are even
     scan_mod = dense_stack.shape[3] % 2

diff --git a/alphadia/workflow/config.py b/alphadia/workflow/config.py
@@ -433,7 +433,7 @@ def recursive_fill_table(
 
 def get_update_table(
     default_config: "Config", configs: List["Config"]
-) -> "pandas.DataFrame":
+) -> "pd.DataFrame":
     """
     Returns a table of the modifications happening to the config
     such that the rows are the keys and the columns are the experiments

diff --git a/alphadia/workflow/peptidecentric.py b/alphadia/workflow/peptidecentric.py
@@ -981,7 +981,7 @@ def requantify(self, psm_df):
             precursor_mz_column="mz_calibrated",
             fragment_mz_column="mz_calibrated",
             rt_column="rt_calibrated",
-            mobility_column=f"mobility_calibrated"
+            mobility_column="mobility_calibrated"
             if self.dia_data.has_mobility
             else "mobility_library",
         )
@@ -1029,7 +1029,7 @@ def requantify_fragments(
         """
 
         self.reporter.log_string(
-            f"=== Transfer learning quantification ===",
+            "=== Transfer learning quantification ===",
             verbosity="progress",
         )
 
@@ -1046,7 +1046,7 @@ def requantify_fragments(
         )
 
         self.reporter.log_string(
-            f"Calibrating library",
+            "Calibrating library",
             verbosity="info",
         )
 
@@ -1075,8 +1075,8 @@ def requantify_fragments(
             candidate_speclib_flat.precursor_df,
             candidate_speclib_flat.fragment_df,
             config=config,
-            precursor_mz_column=f"mz_calibrated",
-            fragment_mz_column=f"mz_calibrated",
+            precursor_mz_column="mz_calibrated",
+            fragment_mz_column="mz_calibrated",
         )
 
         # we disregard the precursors, as we want to keep the original scoring from the top12 search

diff --git a/alphadia/workflow/reporting.py b/alphadia/workflow/reporting.py
@@ -322,7 +322,7 @@ def __enter__(self):
         self.start_time = datetime.now().timestamp()
 
         # empty the file if it exists
-        with open(self.events_path, "w") as f:
+        with open(self.events_path, "w"):
             pass
 
         self.log_event("start", {})

diff --git a/pyproject.toml b/pyproject.toml
@@ -64,6 +64,8 @@ alphadia = "alphadia.cli:run"
 select =  [
     # pycodestyle
     "E",
+    # Pyflakes
+    "F",
 ]
 
 #    # Pyflakes

diff --git a/tests/performance_tests/diann_psm_extraction.py b/tests/performance_tests/diann_psm_extraction.py
@@ -11,7 +11,6 @@
 from alphadia.extraction.planning import Plan
 from alphadia.extraction.calibration import RunCalibration
 from alphadia.extraction.data import TimsTOFDIA
-from alphadia.test_data_downloader import DataShareDownloader
 from alphadia.extraction.scoring import (
     fdr_correction,
     unpack_fragment_info,

diff --git a/tests/unit_tests/conftest.py b/tests/unit_tests/conftest.py
@@ -75,7 +75,6 @@ def mock_precursor_df(
             "charge": precursor_charge,
             "proteins": proteins,
             "genes": genes,
-            "decoy": decoy,
             "proba": proba,
             "qval": qval,
             "sequence": sequences,

diff --git a/tests/unit_tests/test_config_updater.py b/tests/unit_tests/test_config_updater.py
@@ -1,8 +1,9 @@
-from alphadia.workflow.config import *
 import yaml
 from io import StringIO
 import pandas as pd
 
+from alphadia.workflow.config import Config, get_update_table
+
 default_config = """
 version: 1
 

diff --git a/tests/unit_tests/test_fdr.py b/tests/unit_tests/test_fdr.py
@@ -87,7 +87,7 @@ def test_keep_best():
     )
 
 
-def test_keep_best():
+def test_keep_best_2():
     test_df = pd.DataFrame(
         {
             "channel": [0, 0, 0, 4, 4, 4, 8, 8, 8],
@@ -258,11 +258,11 @@ def test_feed_forward():
     # assert classifier.metrics["test_accuracy"][-1] > 0.99
     # assert classifier.metrics["train_accuracy"][-1] > 0.99
 
-    y_pred = classifier.predict(x)
-    # assert np.all(y_pred == y)
+    y_pred = classifier.predict(x)  # noqa: F841  # TODO check for potential bug then remove this line
+    # assert np.all(y_pred == y)  # TODO fix this test
 
-    y_proba = classifier.predict_proba(x)[:, 1]
-    # assert np.all(np.round(y_proba) == y)
+    y_proba = classifier.predict_proba(x)[:, 1]  # noqa: F841  # TODO check for potential bug then remove this line
+    # assert np.all(np.round(y_proba) == y)  # TODO fix this test
 
 
 def test_feed_forward_save():
@@ -285,5 +285,5 @@ def test_feed_forward_save():
         torch.load(os.path.join(tempfolder, "test_feed_forward_save.pth"))
     )
 
-    y_pred = new_classifier.predict(x)
-    # assert np.all(y_pred == y)
+    y_pred = new_classifier.predict(x)  # noqa: F841  # TODO check for potential bug then remove this line
+    # assert np.all(y_pred == y)  # TODO fix this test
diff --git a/tests/unit_tests/test_grouping.py b/tests/unit_tests/test_grouping.py
@@ -197,5 +197,5 @@ def test_grouping_fuzz(expected_time: int = 10):
     grouping_start_time = time.time()
     _ = grouping.perform_grouping(simulated_psm_data, genes_or_proteins="proteins")
     grouping_end_time = time.time()
-    elapsed_time = grouping_end_time - grouping_start_time
-    assert True
+    elapsed_time = grouping_end_time - grouping_start_time  # noqa: F841  # TODO check for potential bug then remove this line
+    assert True  # TODO fix this test
diff --git a/tests/unit_tests/test_reporting.py b/tests/unit_tests/test_reporting.py
@@ -1,6 +1,5 @@
 from matplotlib import pyplot as plt
 import numpy as np
-import tempfile
 import os
 import logging
 import time