Skip to content

Commit

Permalink
introduce modification_mapping_type
Browse files Browse the repository at this point in the history
  • Loading branch information
mschwoer committed Nov 20, 2024
1 parent f0e5d7b commit c3ed0df
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 38 deletions.
32 changes: 20 additions & 12 deletions alphabase/constants/const_files/psm_reader.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,8 @@ alphapept:
'raw_name': 'raw_name' #parse from `ms_data.hdf`` file
'fdr': 'q_value'
'decoy': 'decoy'
modification_mapping:
'Carbamidomethyl@C': 'cC'
'Oxidation@M': 'oxM'
'Phospho@S': 'pS'
'Phospho@T': 'pT'
'Phospho@Y': 'pY'
'Acetyl@Protein_N-term': 'a'
modification_mapping_type: 'alphapept'


maxquant:
reader_type: maxquant
Expand Down Expand Up @@ -49,7 +44,10 @@ maxquant:
'genes': ['Gene Names','Gene names']
'decoy': 'Reverse'
'intensity': 'Intensity'
modification_mapping:
modification_mapping_type: 'maxquant'

modification_mappings:
maxquant:
'Dimethyl@K':
- 'K(Dimethyl)'
'Dimethyl@R':
Expand Down Expand Up @@ -103,6 +101,13 @@ maxquant:
'Deamidated@Q': ['Q(Deamidation (NQ))','Q(de)']
'GlyGly@K': ['K(GlyGly (K))', 'K(gl)']
'hydroxyisobutyryl@K': 'K(2-)'
alphapept:
'Carbamidomethyl@C': 'cC'
'Oxidation@M': 'oxM'
'Phospho@S': 'pS'
'Phospho@T': 'pT'
'Phospho@Y': 'pY'
'Acetyl@Protein_N-term': 'a'

pfind:
reader_type: pfind
Expand All @@ -119,6 +124,7 @@ pfind:
'uniprot_ids': 'Proteins'
'fdr': 'Q-value'
'decoy': ['Target/Decoy', 'Targe/Decoy']
modification_mapping_type: 'maxquant'

msfragger_pepxml:
reader_type: msfragger_pepxml
Expand Down Expand Up @@ -146,6 +152,7 @@ msfragger_pepxml:
- 'Dimethyl@K' # Any_N-term is not needed here as it will be infered in-the-fly
- 'Methyl@E' #an example of a PTM that can be C-term
mod_mass_tol: 0.1 # Da
modification_mapping_type: 'maxquant'

diann:
reader_type: diann
Expand All @@ -168,7 +175,7 @@ diann:
'fdr': 'Q.Value'
mod_seq_columns:
- "Modified.Sequence"
modification_mapping: 'maxquant'
modification_mapping_type: 'maxquant'

spectronaut_report:
reader_type: spectronaut_report
Expand All @@ -184,7 +191,7 @@ spectronaut_report:
'charge': 'charge'
mod_seq_columns:
- 'ModifiedSequence'
modification_mapping: 'maxquant'
modification_mapping_type: 'maxquant'

spectronaut:
reader_type: spectronaut
Expand All @@ -208,7 +215,7 @@ spectronaut:
- 'ModifiedPeptideSequence'
- 'LabeledSequence'
- 'FullUniModPeptideName'
modification_mapping: 'maxquant'
modification_mapping_type: 'maxquant'

library_reader_base:
reader_type: library_reader_base
Expand Down Expand Up @@ -239,7 +246,7 @@ library_reader_base:
- 'FullUniModPeptideName'
- 'LabeledSequence'
- 'FullUniModPeptideName'
modification_mapping: 'maxquant'
modification_mapping_type: 'maxquant'

sage:
reader_type: sage
Expand All @@ -258,3 +265,4 @@ sage:
'peptide_fdr': 'peptide_q'
'protein_fdr': 'protein_q'
'decoy': 'is_decoy'
modification_mapping_type: 'maxquant'
1 change: 0 additions & 1 deletion alphabase/psm_reader/alphapept_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ class AlphaPeptReader(PSMReaderBase):
"""Reader for AlphaPept's *.ms_data.hdf files."""

_reader_type = "alphapept"
_modification_type = "alphapept"

def _load_file(self, filename: str) -> pd.DataFrame:
"""Load an AlphaPept output file to a DataFrame."""
Expand Down
1 change: 0 additions & 1 deletion alphabase/psm_reader/maxquant_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@ class MaxQuantReader(PSMReaderBase):

_reader_type = "maxquant"
_add_unimod_to_mod_mapping = True
_modification_type = "maxquant"

def __init__( # noqa: PLR0913, D417 # too many arguments in function definition, missing argument descriptions
self,
Expand Down
32 changes: 13 additions & 19 deletions alphabase/psm_reader/modification_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def __init__(
custom_modification_mapping: Optional[Dict[str, str]],
*,
reader_yaml: Dict,
modification_type: Optional[str],
mapping_type: str,
add_unimod_to_mod_mapping: bool,
):
"""Initialize the ModificationMapper.
Expand All @@ -35,7 +35,7 @@ def __init__(
reader_yaml:
the yaml (read from file) containing the modification mappings
modification_type:
mapping_type:
the type of modification mapping ("maxquant" or "alphapept")
add_unimod_to_mod_mapping:
Expand All @@ -44,7 +44,7 @@ def __init__(
"""
self._psm_reader_yaml = reader_yaml
self._add_unimod_to_mod_mapping = add_unimod_to_mod_mapping
self._modification_type = modification_type
self._mapping_type = mapping_type

self.modification_mapping = None
self.rev_mod_mapping = None
Expand Down Expand Up @@ -102,16 +102,13 @@ def set_modification_mapping(
if modification_mapping is None:
self._init_modification_mapping()
elif isinstance(
modification_mapping, str
): # TODO: remove this overloading of the parameter by introducing yaml key "modification_mapping_type"
if modification_mapping in self._psm_reader_yaml:
self.modification_mapping = self._psm_reader_yaml[modification_mapping][
"modification_mapping"
]
else:
raise ValueError(
f"Unknown modification mapping: {modification_mapping}"
)
modification_mapping,
str, # interprete as modification_mapping_type
):
self.modification_mapping = self._psm_reader_yaml["modification_mappings"][
modification_mapping
]

else:
self.modification_mapping = copy.deepcopy(modification_mapping)

Expand All @@ -125,12 +122,9 @@ def set_modification_mapping(

def _init_modification_mapping(self) -> None:
"""Initialize the modification mapping from the psm_reader_yaml or as an empty dictionary."""
if self._modification_type is not None:
self.modification_mapping = self._psm_reader_yaml[self._modification_type][
"modification_mapping"
]
else:
self.modification_mapping = {}
self.modification_mapping = self._psm_reader_yaml["modification_mappings"][
self._mapping_type
]

def _add_all_unimod(self) -> None:
"""Add all unimod modifications to the modification mapping."""
Expand Down
8 changes: 3 additions & 5 deletions alphabase/psm_reader/psm_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@ class PSMReaderBase(ABC):
_reader_type: str
# whether to add the unimod mappings to the modification mapping
_add_unimod_to_mod_mapping: bool = False
# the typ of modification mapping to be used
_modification_type: Optional[str] = None
# whether 'rt_norm' values in self._psm_dd will be normalized using min/max values
# Useful to normalize iRT values as they contain negative values.
_min_max_rt_norm = False
Expand Down Expand Up @@ -127,9 +125,9 @@ def __init__( # noqa: PLR0913 # too many arguments
self._modification_mapper = ModificationMapper(
modification_mapping,
reader_yaml=copy.deepcopy(psm_reader_yaml),
modification_type=psm_reader_yaml[self._reader_type].get(
"modification_mapping_type", None
),
mapping_type=psm_reader_yaml[self._reader_type][
"modification_mapping_type"
],
add_unimod_to_mod_mapping=self._add_unimod_to_mod_mapping,
)

Expand Down
2 changes: 2 additions & 0 deletions tests/integration/test_psm_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,8 @@ def _assert_reference_df_equal(
def test_psm_reader_yaml() -> None:
"""Test that all column mappings in the psm_reader.yaml are covered by string constant keys."""
for reader_config in psm_reader_yaml.values():
if reader_config == "modification_mappings":
continue
ks = [k for k in reader_config["column_mapping"]]
assert (
set(ks) - set(PsmDfCols.get_values()) - set(LibPsmDfCols.get_values())
Expand Down

0 comments on commit c3ed0df

Please sign in to comment.