From c3ed0dfafee161e0c25324bd68809016da1edae5 Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Wed, 20 Nov 2024 15:08:58 +0100 Subject: [PATCH] introduce modification_mapping_type --- .../constants/const_files/psm_reader.yaml | 32 ++++++++++++------- alphabase/psm_reader/alphapept_reader.py | 1 - alphabase/psm_reader/maxquant_reader.py | 1 - alphabase/psm_reader/modification_mapper.py | 32 ++++++++----------- alphabase/psm_reader/psm_reader.py | 8 ++--- tests/integration/test_psm_readers.py | 2 ++ 6 files changed, 38 insertions(+), 38 deletions(-) diff --git a/alphabase/constants/const_files/psm_reader.yaml b/alphabase/constants/const_files/psm_reader.yaml index bde674ab..6c8443b2 100644 --- a/alphabase/constants/const_files/psm_reader.yaml +++ b/alphabase/constants/const_files/psm_reader.yaml @@ -13,13 +13,8 @@ alphapept: 'raw_name': 'raw_name' #parse from `ms_data.hdf`` file 'fdr': 'q_value' 'decoy': 'decoy' - modification_mapping: - 'Carbamidomethyl@C': 'cC' - 'Oxidation@M': 'oxM' - 'Phospho@S': 'pS' - 'Phospho@T': 'pT' - 'Phospho@Y': 'pY' - 'Acetyl@Protein_N-term': 'a' + modification_mapping_type: 'alphapept' + maxquant: reader_type: maxquant @@ -49,7 +44,10 @@ maxquant: 'genes': ['Gene Names','Gene names'] 'decoy': 'Reverse' 'intensity': 'Intensity' - modification_mapping: + modification_mapping_type: 'maxquant' + +modification_mappings: + maxquant: 'Dimethyl@K': - 'K(Dimethyl)' 'Dimethyl@R': @@ -103,6 +101,13 @@ maxquant: 'Deamidated@Q': ['Q(Deamidation (NQ))','Q(de)'] 'GlyGly@K': ['K(GlyGly (K))', 'K(gl)'] 'hydroxyisobutyryl@K': 'K(2-)' + alphapept: + 'Carbamidomethyl@C': 'cC' + 'Oxidation@M': 'oxM' + 'Phospho@S': 'pS' + 'Phospho@T': 'pT' + 'Phospho@Y': 'pY' + 'Acetyl@Protein_N-term': 'a' pfind: reader_type: pfind @@ -119,6 +124,7 @@ pfind: 'uniprot_ids': 'Proteins' 'fdr': 'Q-value' 'decoy': ['Target/Decoy', 'Targe/Decoy'] + modification_mapping_type: 'maxquant' msfragger_pepxml: reader_type: msfragger_pepxml @@ -146,6 +152,7 @@ msfragger_pepxml: - 'Dimethyl@K' # Any_N-term is not needed here as it will be infered in-the-fly - 'Methyl@E' #an example of a PTM that can be C-term mod_mass_tol: 0.1 # Da + modification_mapping_type: 'maxquant' diann: reader_type: diann @@ -168,7 +175,7 @@ diann: 'fdr': 'Q.Value' mod_seq_columns: - "Modified.Sequence" - modification_mapping: 'maxquant' + modification_mapping_type: 'maxquant' spectronaut_report: reader_type: spectronaut_report @@ -184,7 +191,7 @@ spectronaut_report: 'charge': 'charge' mod_seq_columns: - 'ModifiedSequence' - modification_mapping: 'maxquant' + modification_mapping_type: 'maxquant' spectronaut: reader_type: spectronaut @@ -208,7 +215,7 @@ spectronaut: - 'ModifiedPeptideSequence' - 'LabeledSequence' - 'FullUniModPeptideName' - modification_mapping: 'maxquant' + modification_mapping_type: 'maxquant' library_reader_base: reader_type: library_reader_base @@ -239,7 +246,7 @@ library_reader_base: - 'FullUniModPeptideName' - 'LabeledSequence' - 'FullUniModPeptideName' - modification_mapping: 'maxquant' + modification_mapping_type: 'maxquant' sage: reader_type: sage @@ -258,3 +265,4 @@ sage: 'peptide_fdr': 'peptide_q' 'protein_fdr': 'protein_q' 'decoy': 'is_decoy' + modification_mapping_type: 'maxquant' diff --git a/alphabase/psm_reader/alphapept_reader.py b/alphabase/psm_reader/alphapept_reader.py index 5c1a332d..b3efba1d 100644 --- a/alphabase/psm_reader/alphapept_reader.py +++ b/alphabase/psm_reader/alphapept_reader.py @@ -52,7 +52,6 @@ class AlphaPeptReader(PSMReaderBase): """Reader for AlphaPept's *.ms_data.hdf files.""" _reader_type = "alphapept" - _modification_type = "alphapept" def _load_file(self, filename: str) -> pd.DataFrame: """Load an AlphaPept output file to a DataFrame.""" diff --git a/alphabase/psm_reader/maxquant_reader.py b/alphabase/psm_reader/maxquant_reader.py index 798c86c1..101efcd8 100644 --- a/alphabase/psm_reader/maxquant_reader.py +++ b/alphabase/psm_reader/maxquant_reader.py @@ -128,7 +128,6 @@ class MaxQuantReader(PSMReaderBase): _reader_type = "maxquant" _add_unimod_to_mod_mapping = True - _modification_type = "maxquant" def __init__( # noqa: PLR0913, D417 # too many arguments in function definition, missing argument descriptions self, diff --git a/alphabase/psm_reader/modification_mapper.py b/alphabase/psm_reader/modification_mapper.py index 355ee08f..534e2610 100644 --- a/alphabase/psm_reader/modification_mapper.py +++ b/alphabase/psm_reader/modification_mapper.py @@ -15,7 +15,7 @@ def __init__( custom_modification_mapping: Optional[Dict[str, str]], *, reader_yaml: Dict, - modification_type: Optional[str], + mapping_type: str, add_unimod_to_mod_mapping: bool, ): """Initialize the ModificationMapper. @@ -35,7 +35,7 @@ def __init__( reader_yaml: the yaml (read from file) containing the modification mappings - modification_type: + mapping_type: the type of modification mapping ("maxquant" or "alphapept") add_unimod_to_mod_mapping: @@ -44,7 +44,7 @@ def __init__( """ self._psm_reader_yaml = reader_yaml self._add_unimod_to_mod_mapping = add_unimod_to_mod_mapping - self._modification_type = modification_type + self._mapping_type = mapping_type self.modification_mapping = None self.rev_mod_mapping = None @@ -102,16 +102,13 @@ def set_modification_mapping( if modification_mapping is None: self._init_modification_mapping() elif isinstance( - modification_mapping, str - ): # TODO: remove this overloading of the parameter by introducing yaml key "modification_mapping_type" - if modification_mapping in self._psm_reader_yaml: - self.modification_mapping = self._psm_reader_yaml[modification_mapping][ - "modification_mapping" - ] - else: - raise ValueError( - f"Unknown modification mapping: {modification_mapping}" - ) + modification_mapping, + str, # interprete as modification_mapping_type + ): + self.modification_mapping = self._psm_reader_yaml["modification_mappings"][ + modification_mapping + ] + else: self.modification_mapping = copy.deepcopy(modification_mapping) @@ -125,12 +122,9 @@ def set_modification_mapping( def _init_modification_mapping(self) -> None: """Initialize the modification mapping from the psm_reader_yaml or as an empty dictionary.""" - if self._modification_type is not None: - self.modification_mapping = self._psm_reader_yaml[self._modification_type][ - "modification_mapping" - ] - else: - self.modification_mapping = {} + self.modification_mapping = self._psm_reader_yaml["modification_mappings"][ + self._mapping_type + ] def _add_all_unimod(self) -> None: """Add all unimod modifications to the modification mapping.""" diff --git a/alphabase/psm_reader/psm_reader.py b/alphabase/psm_reader/psm_reader.py index edb8538d..4c3e9aec 100644 --- a/alphabase/psm_reader/psm_reader.py +++ b/alphabase/psm_reader/psm_reader.py @@ -36,8 +36,6 @@ class PSMReaderBase(ABC): _reader_type: str # whether to add the unimod mappings to the modification mapping _add_unimod_to_mod_mapping: bool = False - # the typ of modification mapping to be used - _modification_type: Optional[str] = None # whether 'rt_norm' values in self._psm_dd will be normalized using min/max values # Useful to normalize iRT values as they contain negative values. _min_max_rt_norm = False @@ -127,9 +125,9 @@ def __init__( # noqa: PLR0913 # too many arguments self._modification_mapper = ModificationMapper( modification_mapping, reader_yaml=copy.deepcopy(psm_reader_yaml), - modification_type=psm_reader_yaml[self._reader_type].get( - "modification_mapping_type", None - ), + mapping_type=psm_reader_yaml[self._reader_type][ + "modification_mapping_type" + ], add_unimod_to_mod_mapping=self._add_unimod_to_mod_mapping, ) diff --git a/tests/integration/test_psm_readers.py b/tests/integration/test_psm_readers.py index f7c28f8f..e33ef8f8 100644 --- a/tests/integration/test_psm_readers.py +++ b/tests/integration/test_psm_readers.py @@ -229,6 +229,8 @@ def _assert_reference_df_equal( def test_psm_reader_yaml() -> None: """Test that all column mappings in the psm_reader.yaml are covered by string constant keys.""" for reader_config in psm_reader_yaml.values(): + if reader_config == "modification_mappings": + continue ks = [k for k in reader_config["column_mapping"]] assert ( set(ks) - set(PsmDfCols.get_values()) - set(LibPsmDfCols.get_values())