Skip to content

Commit

Permalink
feat(muropeptide): awful start to pretty-printing
Browse files Browse the repository at this point in the history
  • Loading branch information
TheLostLambda committed Jul 8, 2024
1 parent e167d5d commit afb6b00
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 25 deletions.
113 changes: 106 additions & 7 deletions crates/muropeptide/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@

mod parser;

use std::fmt::{self, Display, Formatter};

use itertools::Itertools;
use miette::Diagnostic;
use nom_miette::{final_parser, LabeledError};
use parser::{muropeptide, MuropeptideErrorKind};
// FIXME: Blocks need separating and reordering!
use polychem::{
errors::PolychemError, AverageMass, BondId, Charged, Massive, MonoisotopicMass, Polymer,
Polymerizer, ResidueId,
errors::PolychemError, AverageMass, BondId, Charged, GroupState, Massive, ModificationInfo,
MonoisotopicMass, Polymer, Polymerizer, ResidueId,
};
use smithereens::Dissociable;
use thiserror::Error;
Expand Down Expand Up @@ -115,18 +118,114 @@ impl Charged for Muropeptide<'_, '_> {
}
}

impl Dissociable for Muropeptide<'_, '_> {
fn polymer(&self) -> &Polymer {
impl<'s, 'a: 's, 'p: 's> Dissociable<'s, 'a, 'p> for Muropeptide<'a, 'p> {
fn polymer(&self) -> &Polymer<'a, 'p> {
&self.polymer
}

// FIXME: Fucking hideous.
fn new_fragment(
&self,
_fragmented_polymer: Polymer,
_lost_residues: Vec<ResidueId>,
fragmented_polymer: Polymer<'a, 'p>,
lost_residues: Vec<ResidueId>,
_broken_bonds: Vec<BondId>,
) -> Self {
todo!()
// FIXME: Obviously incomplete!
let monomers = self
.monomers
.iter()
.map(|Monomer { glycan, peptide }| {
// FIXME: Likely inefficient, with linear search and not HashSets? Also this should be a function...
let glycan = glycan
.iter()
.copied()
.filter(|id| !lost_residues.contains(id))
.collect();
let peptide = peptide
.iter()
.filter(|aa| !lost_residues.contains(&aa.residue))
.map(|aa| {
let residue = aa.residue;
let lateral_chain = aa.lateral_chain.as_ref().map(|chain| {
let peptide = chain
.peptide
.iter()
.copied()
.filter(|id| !lost_residues.contains(id))
.collect();
LateralChain {
direction: chain.direction,
peptide,
}
});
AminoAcid {
residue,
lateral_chain,
}
})
.collect();
Monomer { glycan, peptide }
})
.collect();
let connections = self.connections.clone();
Self {
polymer: fragmented_polymer,
monomers,
connections,
}
}
}

// FIXME: Oh god.
impl Display for Muropeptide<'_, '_> {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
for monomer in &self.monomers {
display_monomer(f, &self.polymer, monomer)?;
}
Ok(())
}
}

// FIXME: Change all of these individual functions into a trait, then implement it for all of the sub-components. The
// trait could be called something like `DisplayMoiety` and could be a bit like the `ValidateInto` trait?
fn display_monomer(f: &mut Formatter, polymer: &Polymer, monomer: &Monomer) -> fmt::Result {
let Monomer { glycan, peptide } = monomer;
for &monosaccharide in glycan {
display_residue(f, polymer, monosaccharide)?;
}
for _amino_acid in peptide {
todo!();
}
Ok(())
}

// FIXME: Sickening.
fn display_residue(f: &mut Formatter, polymer: &Polymer, residue: ResidueId) -> fmt::Result {
// FIXME: What to do about the unwrap()? Is that fine here?
let residue = polymer.residue(residue).unwrap();
let abbr = residue.abbr();
let named_mods = residue.functional_groups().filter_map(|(_, gs)| {
if let &GroupState::Modified(id) = gs {
let ModificationInfo::Named(named_mod, _) = polymer.modification(id).unwrap() else {
unreachable!();
};
Some(named_mod.abbr().to_owned())
} else {
None
}
});
let offset_mods = residue.offset_modifications().map(|id| {
let ModificationInfo::Offset(modification, _) = polymer.modification(id).unwrap() else {
unreachable!();
};
modification.to_string()
});
let modifications = named_mods.chain(offset_mods).join(", ");

if modifications.is_empty() {
write!(f, "{abbr}")
} else {
write!(f, "{abbr}({modifications})")
}
}

Expand Down
3 changes: 3 additions & 0 deletions crates/polychem/src/moieties/modification.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ impl<K: Charged> Charged for Modification<K> {
}
}

// FIXME: These `Display` impls happen to be useful to me now, in the midst of this sickeningly messy code, but they
// probably shouldn't belong here... I don't think I should be providing `Display` impls for anything that I'm not also
// providing parsers for... At some point, this should probably be removed and moved to the `muropeptide` crate...
fn display_offset_modification(
f: &mut Formatter<'_>,
multiplier: Count,
Expand Down
1 change: 1 addition & 0 deletions crates/polychem/src/moieties/residue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ impl<'a, 'p> Residue<'a, 'p> {
self.name
}

// FIXME: Should I have a version of this method that filters for named modifications?
pub fn functional_groups(&self) -> impl Iterator<Item = (&FunctionalGroup<'p>, &GroupState)> {
self.functional_groups.iter()
}
Expand Down
47 changes: 30 additions & 17 deletions crates/smithereens/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,28 +154,36 @@ impl NodeMapping {
// to be implemented for unsized types, just without those methods, but I don't think this trait makes much sense
// without a `.fragment()` method (which currently requires a `Self: Sized`), so I've elected to make the whole trait
// `Sized` for now
pub trait Dissociable: Sized {
// FIXME: Figure out why borrowck is demanding `'a: 'p` here... That is a correct bound, it's just not something that
// any other part of the code has required? Is it because this is a trait?
// FIXME: Furthermore, I have genuinely no clue what's up with the 's being needed here... Try to remove it and see how
// that goes...
pub trait Dissociable<'s, 'a: 'p + 's, 'p: 's>: Sized {
// TODO: Add a method for checking if Self's polymer is currently in one piece or if it's already disconnected
#[must_use]
fn polymer(&self) -> &Polymer;
fn polymer(&self) -> &Polymer<'a, 'p>;
// FIXME: Naming?
#[must_use]
fn new_fragment(
&self,
fragmented_polymer: Polymer,
fragmented_polymer: Polymer<'a, 'p>,
lost_residues: Vec<ResidueId>,
broken_bonds: Vec<BondId>,
) -> Self;
// FIXME: Christ... That's messy...
#[must_use]
fn fragment(&self) -> Vec<Self> {
todo!()
}
// FIXME: Only for testing! Remove!
fn dbg_fragment(&self) {
fn fragment(&'s self, max_depth: Option<usize>) -> impl Iterator<Item = Self> {
// FIXME: It's the closure capturing this `&'s Polymer` that leads to issues...
let polymer = self.polymer();
let node_mapping = NodeMapping::new(polymer);
let fragment = Fragment::new(&node_mapping, polymer);
eprintln!("\nPieces: {}", fragment.fragment(None).len());
Fragment::new(&node_mapping, polymer)
.fragment(max_depth)
.into_iter()
.map(move |piece| {
let (fragmented_polymer, lost_residues, broken_bonds) =
piece.build_fragment_ion(&node_mapping, polymer);
self.new_fragment(fragmented_polymer, lost_residues, broken_bonds)
})
}
}

Expand Down Expand Up @@ -211,8 +219,9 @@ impl<'p> Fragment<'p> {
self,
node_mapping: &NodeMapping,
polymer: &Polymer<'a, 'p>,
) -> Polymer<'a, 'p> {
) -> (Polymer<'a, 'p>, Vec<ResidueId>, Vec<BondId>) {
let mut fragmented_polymer = polymer.clone();
let mut lost_residues = Vec::new();

for (id, opt_residue) in self.residues.into_iter().enumerate() {
let residue_id = node_mapping.0[id];
Expand All @@ -231,22 +240,26 @@ impl<'p> Fragment<'p> {
.unwrap();
}
} else {
// FIXME: Need to build up a list of the lost residues and broken bonds to return!
// FIXME: Not a fan of this mutable state approach... Could I do this with a `.map()`?
lost_residues.push(residue_id);
fragmented_polymer.remove_residue(residue_id);
}
}

for id in self.broken_bonds {
// FIXME: Need to build up a list of the lost residues and broken bonds to return!
fragmented_polymer.remove_bond(id);
}
// FIXME: Should this be returning an Iterator instead of a `Vec`?
let broken_bonds = self
.broken_bonds
.into_iter()
.filter_map(|id| fragmented_polymer.remove_bond(id).map(|_| id))
.collect();

// FIXME: Hard-coded to generate the 1+ ions!
// SAFETY: `p` is a valid formula, so this shouldn't panic
fragmented_polymer
.new_offset(OffsetKind::Add, 1, "p")
.unwrap();
fragmented_polymer

(fragmented_polymer, lost_residues, broken_bonds)
}

// FIXME: I'm pretty sure this assumption holds, but does a fragmentation depth equalling the degree / valency of
Expand Down
8 changes: 7 additions & 1 deletion src/bin/pgmass.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,13 @@ fn pg_info(formula: &str) -> Result<String> {
writeln!(buf).unwrap();

// FIXME: Remove after debugging is finished!
muropeptide.dbg_fragment();
writeln!(buf, "Fragments:").unwrap();
for fragment in muropeptide.fragment(None) {
// SAFETY: Fragments are currently always charged, so this `unwrap()` is safe!
let mz = fragment.monoisotopic_mz().unwrap();
writeln!(buf, "\n{fragment}").unwrap();
writeln!(buf, "Ion (1+): {mz}").unwrap();
}

Ok(buf)
}
Expand Down

0 comments on commit afb6b00

Please sign in to comment.