Skip to content

Commit

Permalink
Reorganized modules
Browse files Browse the repository at this point in the history
  • Loading branch information
Kviatkovskii, Mikhail (Ext) committed Jul 7, 2024
1 parent fc9a8ff commit 9f86f20
Show file tree
Hide file tree
Showing 28 changed files with 217 additions and 200 deletions.
7 changes: 6 additions & 1 deletion src/bin/ringo_search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@ use ringo::db::search::similarity_search;

fn main() {
let args: Vec<String> = std::env::args().collect();
let results = similarity_search(&args[1], &args[2], args[3].parse().unwrap(), args[4].parse().unwrap());
let results = similarity_search(
&args[1],
&args[2],
args[3].parse().unwrap(),
args[4].parse().unwrap(),
);
for result in results {
println!("{:?} {:?}", result.line, result.similarity);
}
Expand Down
3 changes: 1 addition & 2 deletions src/db.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
pub mod index_item;
pub mod index;
pub mod index_item;
pub mod search;
pub mod fingerprint;
8 changes: 3 additions & 5 deletions src/db/index.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@

use crate::db::index_item::IndexItem;
use crate::io::smiles::reader::molecule::parse_molecule;
use crate::model::fingerprint::FINGERPRINT_SIZE;
use bincode::encode_into_slice;
use std::fs::File;
use std::io::{BufRead, BufWriter, Write};
use crate::db::index_item::IndexItem;
use crate::db::fingerprint::FINGERPRINT_SIZE;
use crate::molecule::smiles::reader::molecule::parse_molecule;

#[cfg(windows)]
const LINE_ENDING_LENGTH: usize = 2;
Expand Down Expand Up @@ -36,7 +35,6 @@ pub fn index_file(smiles_file: &str) {
}
}


#[cfg(test)]
mod test {
use crate::db::index::index_file;
Expand Down
6 changes: 3 additions & 3 deletions src/db/index_item.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::db::fingerprint::Fingerprint;
use crate::model::fingerprint::Fingerprint;
use bincode::{Decode, Encode};

#[derive(Debug, Encode, Decode)]
Expand All @@ -9,11 +9,11 @@ pub struct IndexItem {

#[cfg(test)]
mod tests {
use crate::db::index_item::IndexItem;
use crate::model::fingerprint::Fingerprint;
use bincode::config::standard;
use bincode::{decode_from_slice, encode_to_vec};
use fixedbitset::FixedBitSet;
use crate::db::fingerprint::Fingerprint;
use crate::db::index_item::IndexItem;

#[test]
fn test_index_item_encode_decode() {
Expand Down
8 changes: 2 additions & 6 deletions src/db/search.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::math::similarity::tanimoto::tanimoto_bitset;
use crate::molecule::smiles::reader::molecule::parse_molecule;
use crate::db::index_item::IndexItem;
use crate::io::smiles::reader::molecule::parse_molecule;
use crate::math::similarity::tanimoto::tanimoto_bitset;
use std::fs::File;
use std::io::{BufRead, BufReader, Read, Seek};

Expand Down Expand Up @@ -67,10 +67,6 @@ pub fn similarity_search(
results
}

fn main() {
println!("db-search");
}

#[cfg(test)]
mod test {
use crate::db::index::index_file;
Expand Down
2 changes: 2 additions & 0 deletions src/descriptors.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
mod fingerprints;
mod weight;
1 change: 1 addition & 0 deletions src/descriptors/fingerprints.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
mod ecfp;
84 changes: 84 additions & 0 deletions src/descriptors/fingerprints/ecfp.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
use crate::model::atom::Atom;
use crate::model::bond::Bond;
use crate::model::fingerprint::{Fingerprint, FINGERPRINT_SIZE};
use crate::model::molecule::Molecule;
use fixedbitset::FixedBitSet;
use petgraph::graph::NodeIndex;
use petgraph::prelude::{EdgeRef, StableGraph};
use petgraph::Undirected;
use std::hash::{DefaultHasher, Hasher};

impl Molecule {
pub fn ecfp(&self, radius: usize, fp_length: usize) -> Fingerprint {
let mut fp = FixedBitSet::with_capacity(FINGERPRINT_SIZE);

for node in self.graph.node_indices() {
ecfp_recursive(
&self.graph,
radius,
1,
node,
&mut fp,
fp_length,
&mut DefaultHasher::new(),
);
}

Fingerprint(fp)
}
}

fn ecfp_recursive(
graph: &StableGraph<Atom, Bond, Undirected>,
radius: usize,
depth: usize,
node: NodeIndex,
fp: &mut FixedBitSet,
fp_length: usize,
hasher: &mut DefaultHasher,
) {
if depth > radius {
return;
}

let atom = graph.node_weight(node).unwrap();
hasher.write_u8(atom.element.atomic_number);
hasher.write_u8(atom.isotope);
hasher.write_i8(atom.charge);
hasher.write_u8(atom.hs);

let value = hasher.clone().finish();
fp.insert(value as usize % fp_length);

for edge in graph.edges(node) {
let bond = edge.weight();
hasher.write_u8(bond.order as u8);

let target = if edge.source() == node {
edge.target()
} else {
edge.source()
};

ecfp_recursive(graph, radius, depth + 1, target, fp, fp_length, hasher);
}
}

#[cfg(test)]
mod test {
use crate::io::smiles::reader::molecule::parse_molecule;
use crate::math::similarity::tanimoto::tanimoto_bitset;
#[test]
fn test_ecfp() {
let ecfp_ibuprofen = parse_molecule("CC(C)CC1=CC=C(C=C1)C(C)C(=O)O")
.unwrap()
.1
.ecfp(2, 128);
let ecfp_naproxen = parse_molecule("CC(C1=CC2=C(C=C1)C=C(C=C2)OC)C(=O)O")
.unwrap()
.1
.ecfp(2, 128);
let sim = tanimoto_bitset(&ecfp_ibuprofen.0, &ecfp_naproxen.0);
assert!(0.53 < sim && sim < 0.54);
}
}
11 changes: 11 additions & 0 deletions src/descriptors/weight.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
use crate::model::molecule::Molecule;

impl Molecule {
pub fn weight(&self) -> f64 {
let mut weight: f64 = 0.0;
for atom in self.graph.node_weights() {
weight += atom.element.atomic_weight();
}
weight
}
}
1 change: 1 addition & 0 deletions src/io.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub(crate) mod smiles;
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use crate::molecule::model::atom::Atom;
use crate::molecule::model::element::Element;
use crate::molecule::smiles::reader::charge::parse_charge;
use crate::molecule::smiles::reader::element::parse_element;
use crate::molecule::smiles::reader::hydrogens::parse_hydrogens;
use crate::molecule::smiles::reader::isotope::parse_isotope;
use crate::io::smiles::reader::charge::parse_charge;
use crate::io::smiles::reader::element::parse_element;
use crate::io::smiles::reader::hydrogens::parse_hydrogens;
use crate::io::smiles::reader::isotope::parse_isotope;
use crate::model::atom::Atom;
use crate::model::element::Element;
use nom::combinator::opt;
use nom::IResult;

Expand Down Expand Up @@ -47,7 +47,7 @@ pub fn parse_atom(input: &str) -> IResult<&str, Atom> {

#[cfg(test)]
mod tests {
use crate::molecule::smiles::reader::atom::parse_atom;
use crate::io::smiles::reader::atom::parse_atom;

fn do_test_parse_atom(input: &str, atomic_number: u8, charge: i8, hs: u8, isotope: u8) {
let (remaining_input, atom) = parse_atom(input).unwrap();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::molecule::model::bond::{Bond, BondOrder};
use crate::model::bond::{Bond, BondOrder};
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::combinator::map;
Expand All @@ -16,8 +16,8 @@ pub fn parse_bond(input: &str) -> IResult<&str, Bond> {

#[cfg(test)]
mod tests {
use crate::ringo::molecule::model::bond::BondOrder;
use crate::ringo::molecule::smiles::reader::bond::parse_bond;
use crate::io::smiles::reader::bond::parse_bond;
use crate::model::bond::BondOrder;

#[test]
fn parse_bond_empty() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ pub fn parse_charge(input: &str) -> IResult<&str, i8> {

#[cfg(test)]
mod tests {
use crate::ringo::molecule::smiles::reader::charge::parse_charge;
use crate::io::smiles::reader::charge::parse_charge;
use nom::error::{Error, ErrorKind};

#[test]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ pub fn parse_element(input: &str) -> IResult<&str, u8> {

#[cfg(test)]
mod tests {
use crate::ringo::molecule::smiles::reader::element::parse_element;
use crate::io::smiles::reader::element::parse_element;
use nom::error::{Error, ErrorKind};

#[test]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ pub fn parse_hydrogens(input: &str) -> IResult<&str, u8> {

#[cfg(test)]
mod tests {
use crate::ringo::molecule::smiles::reader::hydrogens::parse_hydrogens;
use crate::io::smiles::reader::hydrogens::parse_hydrogens;
use nom::error::{Error, ErrorKind};

#[test]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ pub fn parse_isotope(input: &str) -> IResult<&str, u8> {

#[cfg(test)]
mod tests {
use crate::ringo::molecule::smiles::reader::isotope::parse_isotope;
use crate::io::smiles::reader::isotope::parse_isotope;
use nom::error::{Error, ErrorKind};

#[test]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::molecule::model::bond::{Bond, BondOrder};
use crate::molecule::model::molecule::Molecule;
use crate::molecule::smiles::reader::atom::parse_atom;
use crate::molecule::smiles::reader::bond::parse_bond;
use crate::io::smiles::reader::atom::parse_atom;
use crate::io::smiles::reader::bond::parse_bond;
use crate::model::bond::{Bond, BondOrder};
use crate::model::molecule::Molecule;
use nom::branch::alt;
use nom::character::complete::{char, digit1};
use nom::combinator::{map, map_res};
Expand Down Expand Up @@ -90,8 +90,8 @@ pub fn parse_molecule(input: &str) -> IResult<&str, Molecule> {

#[cfg(test)]
mod tests {
use crate::ringo::molecule::model::bond::BondOrder;
use crate::ringo::molecule::smiles::reader::molecule::parse_molecule;
use crate::io::smiles::reader::molecule::parse_molecule;
use crate::model::bond::BondOrder;
use petgraph::stable_graph::{EdgeIndex, NodeIndex};

#[test]
Expand Down
4 changes: 3 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
pub mod db;
mod descriptors;
mod io;
mod math;
mod molecule;
mod model;
1 change: 1 addition & 0 deletions src/molecule/model.rs → src/model.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pub mod atom;
pub mod bond;
pub mod element;
pub mod fingerprint;
pub mod molecule;
2 changes: 1 addition & 1 deletion src/molecule/model/atom.rs → src/model/atom.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::molecule::model::element::Element;
use crate::model::element::Element;

#[derive(Hash, Eq, PartialEq, Debug)]
pub struct Atom {
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion src/molecule/model/element.rs → src/model/element.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ impl Element {

#[cfg(test)]
mod test {
use crate::ringo::molecule::model::element::Element;
use crate::model::element::Element;

#[test]
fn test_element() {
Expand Down
2 changes: 1 addition & 1 deletion src/db/fingerprint.rs → src/model/fingerprint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ impl<'de> bincode::BorrowDecode<'de> for Fingerprint {

#[cfg(test)]
mod tests {
use crate::db::fingerprint::{Fingerprint, FINGERPRINT_SIZE};
use crate::model::fingerprint::{Fingerprint, FINGERPRINT_SIZE};
use fixedbitset::FixedBitSet;

#[test]
Expand Down
Loading

0 comments on commit 9f86f20

Please sign in to comment.