Skip to content

Commit

Permalink
Merge pull request #178 from OpenBioSim/fix_177
Browse files Browse the repository at this point in the history
Fix 177
  • Loading branch information
chryswoods authored Mar 14, 2024
2 parents fe79146 + 517c203 commit d9d8f24
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 28 deletions.
3 changes: 3 additions & 0 deletions doc/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,9 @@ organisation on `GitHub <https://github.com/openbiosim/sire>`__.
customise the list of elements that are considered biological. This
fixes issue #170.

* Fixed a bug in the algorithm used to infer bond order when converting to
RDKit format. This fixes issue #177.

* Please add an item to this changelog when you create your PR

`2023.5.1 <https://github.com/openbiosim/sire/compare/2023.5.0...2023.5.1>`__ - January 2024
Expand Down
10 changes: 10 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,16 @@ def pdbx_3nss():
return None


@pytest.fixture(scope="session")
def ejm55_sdf():
return sr.load_test_files("ejm55.sdf")


@pytest.fixture(scope="session")
def ejm55_gro():
return sr.load_test_files("ejm55.gro", "ejm55.top")


@pytest.fixture(scope="session")
def testfile_cache_dir():
import os
Expand Down
23 changes: 23 additions & 0 deletions tests/convert/test_rdkit.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,26 @@ def test_rdkit_returns_null():
# this should be the default
with pytest.raises(ValueError):
mol = sr.smiles("c3cc[c+]2cc(C1CCCC1)[nH]c2c3")


@pytest.mark.skipif(
"rdkit" not in sr.convert.supported_formats(),
reason="rdkit support is not available",
)
def test_rdkit_infer_bonds(ejm55_sdf, ejm55_gro):
sdf = ejm55_sdf[0].molecule()
gro = ejm55_gro["not (protein or water)"].molecule()

assert sdf.smiles() == gro.smiles()

match_sdf = sdf["smarts [NX3][CX3](=[OX1])[#6]"]
match_gro = gro["smarts [NX3][CX3](=[OX1])[#6]"]

print(match_sdf)
print(match_gro)

assert len(match_sdf) == 1
assert len(match_gro) == 1

for s, g in zip(match_sdf, match_gro):
assert s.number() == g.number()
124 changes: 96 additions & 28 deletions wrapper/Convert/SireRDKit/sire_rdkit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -408,27 +408,67 @@ namespace SireRDKit

for (auto atom : molecule.atoms())
{
atoms.append(std::make_pair(get_nb_unpaired_electrons(*atom),
atom));
if (atom->getAtomicNum() > 1)
{
atoms.append(std::make_pair(get_nb_unpaired_electrons(*atom),
atom));
}
}

// sort these atoms so that the ones with most unpaired electrons
// come first
std::sort(atoms.begin(), atoms.end(),
[](const std::pair<QVector<int>, RDKit::Atom *> &atom0,
const std::pair<QVector<int>, RDKit::Atom *> &atom1)
{
return std::get<0>(atom0).at(0) > std::get<0>(atom1).at(0);
});
std::stable_sort(atoms.begin(), atoms.end(),
[](const std::pair<QVector<int>, RDKit::Atom *> &atom0,
const std::pair<QVector<int>, RDKit::Atom *> &atom1)
{
return std::get<0>(atom0).at(0) > std::get<0>(atom1).at(0);
});

for (const auto &p : atoms)
{
// number of unpaired electrons
auto nue = std::get<0>(p);

// for this atom...
RDKit::Atom *atom = std::get<1>(p);

if (atom->getDegree() == 0)
{
// no neighbors, so no bonds - this could be a monovalent cation
switch (atom->getAtomicNum())
{
case 3:
case 11:
case 19:
case 37:
case 47:
case 55:
atom->setFormalCharge(1);
break;
case 12:
case 20:
case 29:
case 30:
case 38:
case 56:
case 26:
atom->setFormalCharge(2);
break;
case 13:
// Fe could also be + 3
atom->setFormalCharge(3);
break;
default:
// no, it is an anion - use the negative of the number of
// unpaired electrons
atom->setFormalCharge(-get_nb_unpaired_electrons(*atom)[0]);
break;
}

molecule.updatePropertyCache(false);
continue;
}

// number of unpaired electrons
auto nue = get_nb_unpaired_electrons(*atom);

// if there's only one possible valence state and the corresponding
// nue is negative, it means we can only add a positive charge to
// the atom
Expand All @@ -454,26 +494,26 @@ namespace SireRDKit
n));
}

std::sort(neighbors.begin(), neighbors.end(),
[](const std::pair<QVector<int>, RDKit::Atom *> &atom0,
const std::pair<QVector<int>, RDKit::Atom *> &atom1)
{
return std::get<0>(atom0).at(0) > std::get<0>(atom1).at(0);
});

int min_nue = 0;
std::stable_sort(neighbors.begin(), neighbors.end(),
[](const std::pair<QVector<int>, RDKit::Atom *> &atom0,
const std::pair<QVector<int>, RDKit::Atom *> &atom1)
{
return std::get<0>(atom0).at(0) > std::get<0>(atom1).at(0);
});

for (const auto n : nue)
for (int i = 0; i < neighbors.count(); ++i)
{
if (n > 0)
int min_nue = 0;

for (const auto n : nue)
{
if (min_nue == 0 or n < min_nue)
min_nue = n;
if (n > 0)
{
if (min_nue == 0 or n < min_nue)
min_nue = n;
}
}
}

for (int i = 0; i < neighbors.count(); ++i)
{
const auto na_nue = std::get<0>(neighbors.at(i));
auto neighbor = std::get<1>(neighbors.at(i));

Expand Down Expand Up @@ -506,13 +546,41 @@ namespace SireRDKit
// recalculate the nue for this atom
nue = get_nb_unpaired_electrons(*atom);
}

bool any_equal_zero = false;

for (auto n : nue)
{
if (n == 0)
{
any_equal_zero = true;
break;
}
}

if (any_equal_zero)
{
// we can stop here
break;
}
}
}
} // end of loop over neighbours

// recalculate the nue for this atom
nue = get_nb_unpaired_electrons(*atom);

if (nue[0] > 0)
bool any_equal_zero = false;

for (auto n : nue)
{
if (n == 0)
{
any_equal_zero = true;
break;
}
}

if (not any_equal_zero)
{
// transform it to a negative charge
atom->setFormalCharge(-nue[0]);
Expand Down

0 comments on commit d9d8f24

Please sign in to comment.