From 36ccc7d6c23e38424bb9b9ec1cb74eaf59e0a935 Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Thu, 21 Nov 2024 09:53:40 +0100 Subject: [PATCH] refactor MSFraggerReader --- alphabase/psm_reader/modification_mapper.py | 4 ++-- alphabase/psm_reader/msfragger_reader.py | 23 ++++++++++++++------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/alphabase/psm_reader/modification_mapper.py b/alphabase/psm_reader/modification_mapper.py index 534e2610..b8a3ce52 100644 --- a/alphabase/psm_reader/modification_mapper.py +++ b/alphabase/psm_reader/modification_mapper.py @@ -95,8 +95,8 @@ def set_modification_mapping( ---------- modification_mapping: If dictionary: the current modification_mapping will be overwritten by this. - If str: the parameter will be interpreted as a reader type, and the modification_mapping is read from the - "modification_mapping" section of the psm_reader_yaml + If str: the parameter will be interpreted as a modification_mapping_type, and the mapping is read from the + respective key in the "modification_mappings" section of the psm_reader_yaml """ if modification_mapping is None: diff --git a/alphabase/psm_reader/msfragger_reader.py b/alphabase/psm_reader/msfragger_reader.py index c2d05056..cc9eb0da 100644 --- a/alphabase/psm_reader/msfragger_reader.py +++ b/alphabase/psm_reader/msfragger_reader.py @@ -21,12 +21,11 @@ def _is_fragger_decoy(proteins: List[str]) -> bool: return all(prot.lower().startswith("rev_") for prot in proteins) -mass_mapped_mods = psm_reader_yaml["msfragger_pepxml"]["mass_mapped_mods"] -mod_mass_tol = psm_reader_yaml["msfragger_pepxml"]["mod_mass_tol"] - - def _get_mods_from_masses( # noqa: PLR0912, C901 too many branches, too complex TODO: refactor - sequence: str, msf_aa_mods: List[str] + sequence: str, + msf_aa_mods: List[str], + mass_mapped_mods: List[str], + mod_mass_tol: float, ) -> Tuple[str, str, str, str]: mods = [] mod_sites = [] @@ -134,7 +133,10 @@ def __init__( # noqa: PLR0913, D417 # too many arguments in function definition rt_unit=rt_unit, **kwargs, ) - self.keep_unknown_aa_mass_diffs = keep_unknown_aa_mass_diffs + self._keep_unknown_aa_mass_diffs = keep_unknown_aa_mass_diffs + # TODO: should those be set via API, too? + self._mass_mapped_mods = psm_reader_yaml["msfragger_pepxml"]["mass_mapped_mods"] + self._mod_mass_tol = psm_reader_yaml["msfragger_pepxml"]["mod_mass_tol"] def _translate_modifications(self) -> None: pass @@ -183,11 +185,16 @@ def _load_modifications(self, origin_df: pd.DataFrame) -> None: self._psm_df[PsmDfCols.AA_MASS_DIFF_SITES], ) = zip( *origin_df[["peptide", "modifications"]].apply( - lambda x: _get_mods_from_masses(*x), axis=1 + lambda x: _get_mods_from_masses( + *x, + mass_mapped_mods=self._mass_mapped_mods, + mod_mass_tol=self._mod_mass_tol, + ), + axis=1, ) ) - if not self.keep_unknown_aa_mass_diffs: + if not self._keep_unknown_aa_mass_diffs: self._psm_df[PsmDfCols.TO_REMOVE] += ( self._psm_df[PsmDfCols.AA_MASS_DIFFS] != "" )