Skip to content

Commit

Permalink
messy(muropeptide): revolting fragment printing
Browse files Browse the repository at this point in the history
  • Loading branch information
TheLostLambda committed Jul 9, 2024
1 parent afb6b00 commit 1eb631b
Show file tree
Hide file tree
Showing 4 changed files with 163 additions and 38 deletions.
142 changes: 122 additions & 20 deletions crates/muropeptide/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ struct AminoAcid {
lateral_chain: Option<LateralChain>,
}

// FIXME: This should store the `BondId` of each connection, so we know when it's removed!
#[derive(Clone, Debug, Eq, PartialEq)]
enum Connection {
GlycosidicBond,
Expand Down Expand Up @@ -136,35 +137,66 @@ impl<'s, 'a: 's, 'p: 's> Dissociable<'s, 'a, 'p> for Muropeptide<'a, 'p> {
.iter()
.map(|Monomer { glycan, peptide }| {
// FIXME: Likely inefficient, with linear search and not HashSets? Also this should be a function...
let glycan = glycan
let glycan: Vec<_> = glycan
.iter()
.copied()
.filter(|id| !lost_residues.contains(id))
.collect();
let peptide = peptide
let mut lateral_peptides = Vec::new();
let peptide: Vec<_> = peptide
.iter()
.filter(|aa| !lost_residues.contains(&aa.residue))
.map(|aa| {
.filter_map(|aa| {
let residue = aa.residue;
let lateral_chain = aa.lateral_chain.as_ref().map(|chain| {
let peptide = chain
.peptide
if !lost_residues.contains(&residue) {
let lateral_chain = aa.lateral_chain.as_ref().and_then(|chain| {
let peptide: Vec<_> = chain
.peptide
.iter()
.copied()
.filter(|id| !lost_residues.contains(id))
.collect();
if peptide.is_empty() {
None
} else {
Some(LateralChain {
direction: chain.direction,
peptide,
})
}
});
Some(AminoAcid {
residue,
lateral_chain,
})
} else if let Some(LateralChain { peptide, .. }) = &aa.lateral_chain {
let peptide: Vec<_> = peptide
.iter()
.copied()
.filter(|id| !lost_residues.contains(id))
.collect();
LateralChain {
direction: chain.direction,
peptide,
if !peptide.is_empty() {
lateral_peptides.push(peptide);
}
});
AminoAcid {
residue,
lateral_chain,
None
} else {
None
}
})
.collect();
Monomer { glycan, peptide }
if peptide.is_empty() && glycan.is_empty() && !lateral_peptides.is_empty() {
// FIXME: Probably not true eventually...
assert_eq!(lateral_peptides.len(), 1);
let peptide = lateral_peptides[0]
.iter()
.map(|&residue| AminoAcid {
residue,
lateral_chain: None,
})
.collect();
Monomer { glycan, peptide }
} else {
Monomer { glycan, peptide }
}
})
.collect();
let connections = self.connections.clone();
Expand All @@ -179,23 +211,88 @@ impl<'s, 'a: 's, 'p: 's> Dissociable<'s, 'a, 'p> for Muropeptide<'a, 'p> {
// FIXME: Oh god.
impl Display for Muropeptide<'_, '_> {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
for monomer in &self.monomers {
display_monomer(f, &self.polymer, monomer)?;
let connections = self.connections.clone();
let mut cross_links = Vec::new();

for (i, left_monomer) in self.monomers.iter().enumerate() {
if left_monomer.glycan.is_empty() && left_monomer.peptide.is_empty() {
continue;
}
display_monomer(f, &self.polymer, left_monomer)?;
if let Some(right_monomer) = self.monomers.get(i + 1) {
if right_monomer.glycan.is_empty() && right_monomer.peptide.is_empty() {
continue;
}
if let Some(connection) = connections.get(i) {
match connection {
Connection::GlycosidicBond => write!(f, "~")?,
Connection::Crosslink(descriptors) => {
write!(f, "=")?;
assert_eq!(descriptors.len(), 1);
cross_links.push(descriptors[0].to_string());
}
Connection::Both(descriptors) => {
write!(f, "~=")?;
assert_eq!(descriptors.len(), 1);
cross_links.push(descriptors[0].to_string());
}
}
}
}
}

let cross_links = cross_links.join(", ");
if !cross_links.is_empty() {
write!(f, " ({cross_links})")?;
}

Ok(())
}
}

impl Display for CrosslinkDescriptor {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self {
Self::DonorAcceptor(l, r) => write!(f, "{l}-{r}"),
Self::AcceptorDonor(l, r) => write!(f, "{l}={r}"),
}
}
}

// FIXME: Change all of these individual functions into a trait, then implement it for all of the sub-components. The
// trait could be called something like `DisplayMoiety` and could be a bit like the `ValidateInto` trait?
fn display_monomer(f: &mut Formatter, polymer: &Polymer, monomer: &Monomer) -> fmt::Result {
let Monomer { glycan, peptide } = monomer;
for &monosaccharide in glycan {
display_residue(f, polymer, monosaccharide)?;
}
for _amino_acid in peptide {
todo!();
if !glycan.is_empty() && !peptide.is_empty() {
write!(f, "-")?;
}
for amino_acid in peptide {
display_residue(f, polymer, amino_acid.residue)?;
if let Some(chain) = &amino_acid.lateral_chain {
write!(f, "[")?;
for &residue in &chain.peptide {
display_residue(f, polymer, residue)?;
}
write!(f, "]")?;
}
}
// TODO: Bring this back once I have more ion types, but this just noise for now (when it's all protons)
// let unlocalised_modifications = polymer
// .modification_refs()
// .filter_map(|info| {
// if let ModificationInfo::Unlocalized(modification) = info {
// Some(modification.to_string())
// } else {
// None
// }
// })
// .join(", ");
// if !unlocalised_modifications.is_empty() {
// write!(f, " ({unlocalised_modifications})")?;
// }
Ok(())
}

Expand All @@ -220,7 +317,12 @@ fn display_residue(f: &mut Formatter, polymer: &Polymer, residue: ResidueId) ->
};
modification.to_string()
});
let modifications = named_mods.chain(offset_mods).join(", ");
let modifications = named_mods
.chain(offset_mods)
// FIXME: Awful hacks to format things in a way Steph likes...
.map(|m| m.replace("Red", "r").replace("-H2O", ""))
.filter(|m| !m.is_empty())
.join(", ");

if modifications.is_empty() {
write!(f, "{abbr}")
Expand Down
18 changes: 18 additions & 0 deletions crates/polychem/src/polymers/polymer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,24 @@ impl<'a, 'p> Polymer<'a, 'p> {
self.modifications.get(&id)
}

// FIXME: This is part of an incomplete set of methods — be sure to add in methods for iterating over IDs and
// also just values? Needs more design thought in general! For example, should I have a method that just returns
// unlocalized modifications?
// FIXME: Also needs testing!
pub fn modifications(
&self,
) -> impl Iterator<Item = (ModificationId, &ModificationInfo<'a, 'p>)> {
self.modifications.iter().map(|(&id, info)| (id, info))
}

// FIXME: This is part of an incomplete set of methods — be sure to add in methods for iterating over IDs and
// also just values? Needs more design thought in general! For example, should I have a method that just returns
// unlocalized modifications?
// FIXME: Also needs testing!
pub fn modification_refs(&self) -> impl Iterator<Item = &ModificationInfo<'a, 'p>> {
self.modifications.values()
}

pub fn new_chain(
&mut self,
abbr: impl AsRef<str>,
Expand Down
27 changes: 14 additions & 13 deletions crates/smithereens/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#![type_length_limit = "18554191"]

use std::{
cmp::{self, Ordering},
cmp::Ordering,
convert::identity,
fmt::{self, Display, Formatter},
hash::{Hash, Hasher},
Expand Down Expand Up @@ -265,14 +265,17 @@ impl<'p> Fragment<'p> {
// FIXME: I'm pretty sure this assumption holds, but does a fragmentation depth equalling the degree / valency of
// the graph mean 100% of fragments will be generated at that depth? If so, this saves a lot of time wasted on
// "deeper" fragmentations that have no chance of generating a unique fragment!
fn degree(&self) -> usize {
self.residues
.iter()
.flatten()
.map(|residue| residue.bonds.len())
.max()
.unwrap_or_default()
}
// FIXME: Update... This does *not* hold... It will ensure that you have the set you need to create all fragments
// after finding their intersections, but some larger pieces (several nodes) might collectively have more
// connections to the rest of the graph than the graph's degree...
// fn degree(&self) -> usize {
// self.residues
// .iter()
// .flatten()
// .map(|residue| residue.bonds.len())
// .max()
// .unwrap_or_default()
// }

// PERF: This could be memoized, but I don't know how much that would gain me if I'm not adding sub-fragmentations
// to the cache... It would only help if the user has duplicate structures they are asking to fragment!
Expand All @@ -283,12 +286,10 @@ impl<'p> Fragment<'p> {
let mut fragments = HashSet::new();

let max_depth = max_depth.unwrap_or(usize::MAX);
let useful_depth = cmp::min(self.degree(), max_depth);

for depth in 0..=useful_depth {
for depth in 0..=max_depth {
while let Some(next) = processing.pop() {
// FIXME: Can I avoid this clone?
if fragments.insert(next.clone()) && depth < useful_depth {
if fragments.insert(next.clone()) && depth < max_depth {
processing_queue.extend(next.cut_each_bond().flat_map(divide_fragment));
}
}
Expand Down
14 changes: 9 additions & 5 deletions src/bin/pgmass.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,15 @@ fn pg_info(formula: &str) -> Result<String> {

// FIXME: Remove after debugging is finished!
writeln!(buf, "Fragments:").unwrap();
for fragment in muropeptide.fragment(None) {
// SAFETY: Fragments are currently always charged, so this `unwrap()` is safe!
let mz = fragment.monoisotopic_mz().unwrap();
writeln!(buf, "\n{fragment}").unwrap();
writeln!(buf, "Ion (1+): {mz}").unwrap();
let mut fragments: Vec<_> = muropeptide
.fragment(None)
.map(|fragment| (fragment.to_string(), fragment.monoisotopic_mz().unwrap()))
.collect();
fragments
.sort_unstable_by(|(s1, mz1), (s2, mz2)| mz1.cmp(mz2).then_with(|| s1.cmp(s2)).reverse());
fragments.dedup();
for (structure, mz) in fragments {
writeln!(buf, r#""{structure}",{mz:.6}"#).unwrap();
}

Ok(buf)
Expand Down

0 comments on commit 1eb631b

Please sign in to comment.