From 379bc103369fca2e7e978b9f8470137daec746d5 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 18 Feb 2025 09:20:24 +0000 Subject: [PATCH] Backport fixes from PR #299. [ci skip] --- corelib/src/libs/SireIO/amberrst7.cpp | 9 +++ corelib/src/libs/SireIO/sdf.cpp | 3 + doc/source/changelog.rst | 9 +++ tests/conftest.py | 5 ++ tests/convert/test_rdkit.py | 31 ++++++++ tests/io/test_sdf.py | 7 ++ wrapper/Convert/SireRDKit/sire_rdkit.cpp | 95 +++++++++++++++++++++++- 7 files changed, 158 insertions(+), 1 deletion(-) diff --git a/corelib/src/libs/SireIO/amberrst7.cpp b/corelib/src/libs/SireIO/amberrst7.cpp index a0f8bc3eb..d9ce93f9f 100644 --- a/corelib/src/libs/SireIO/amberrst7.cpp +++ b/corelib/src/libs/SireIO/amberrst7.cpp @@ -357,6 +357,15 @@ void AmberRst7::parse(const PropertyMap &map) // we have already validated that there are enough lines const QString &line = l[linenum]; + if (natoms <= 2 and line.trimmed().isEmpty()) + { + // if there are only two atoms, then the final line might not be reserved + // for velocities + vels.clear(); + + return; + } + if (line.length() < column + 36) { errors.append(QObject::tr("Cannot read the velocities for the atom at index %1 as " diff --git a/corelib/src/libs/SireIO/sdf.cpp b/corelib/src/libs/SireIO/sdf.cpp index f263575f5..b18fbb4bc 100644 --- a/corelib/src/libs/SireIO/sdf.cpp +++ b/corelib/src/libs/SireIO/sdf.cpp @@ -1898,6 +1898,9 @@ MolEditor SDF::getMolecule(int imol, const PropertyMap &map) const mol.setProperty(map["sdf_counts"], SireBase::wrap(sdf_counts)); } + // set the name of the molecule + mol.rename(MolName(sdfmol.name)); + return mol.setProperty(map["coordinates"], coords) .setProperty(map["formal_charge"], charges) .setProperty(map["element"], elements) diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst index 68574dc7e..16b3d3782 100644 --- a/doc/source/changelog.rst +++ b/doc/source/changelog.rst @@ -12,6 +12,15 @@ Development was migrated into the `OpenBioSim `__ organisation on `GitHub `__. +`2024.4.2 `__ - Feb 2025 +---------------------------------------------------------------------------------------- + +* Preserve molecule name when reading from SDF format. + +* Handle missing velocity data for ``AMBER`` RST7 files with only a few atoms. + +* Preserve SDF metadata when converting to ``RDKIt`` format. + `2024.4.1 `__ - Feb 2025 ---------------------------------------------------------------------------------------- diff --git a/tests/conftest.py b/tests/conftest.py index f0c82a7d1..8077aae0b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -243,3 +243,8 @@ def openmm_platform(): @pytest.fixture(scope="session") def thrombin_complex(): return sr.load_test_files("thrombin.top", "thrombin.rst7") + + +@pytest.fixture(scope="session") +def tagged_sdf(): + return sr.load_test_files("sdf_tags.sdf")[0] diff --git a/tests/convert/test_rdkit.py b/tests/convert/test_rdkit.py index df6b8d76c..e3189f0fa 100644 --- a/tests/convert/test_rdkit.py +++ b/tests/convert/test_rdkit.py @@ -179,3 +179,34 @@ def test_rdkit_force_infer(): assert bond == "SINGLE" assert bond_infer == "TRIPLE" + + +@pytest.mark.skipif( + "rdkit" not in sr.convert.supported_formats(), + reason="rdkit support is not available", +) +def test_rdkit_sdf_tags(tagged_sdf): + + # store the sdf data property + sdf_data = tagged_sdf.property("sdf_data") + + # convert to rdkit + rdmol = sr.convert.to_rdkit(tagged_sdf) + + # convert back to sire + mol = sr.convert.to_sire(rdmol) + + # get the rdf data property + rdkit_data = mol.property("rdkit_data") + + # check that the data is the same + for prop in sdf_data.keys(): + assert prop in rdkit_data.keys() + assert sdf_data[prop] == rdkit_data[prop] + + # convert back to rdkit + rdmol2 = sr.convert.to_rdkit(mol) + + # check that the data is the same on the rdkit molecule + for prop in rdmol.GetPropNames(): + assert rdmol.GetProp(prop) == rdmol2.GetProp(prop) diff --git a/tests/io/test_sdf.py b/tests/io/test_sdf.py index b5bd8b881..646de1d76 100644 --- a/tests/io/test_sdf.py +++ b/tests/io/test_sdf.py @@ -41,3 +41,10 @@ def test_charge(): # Read back in and check that the charges are still correct. for c0, c1 in zip(mol.property("formal_charge").to_list(), mapping.values()): assert isclose(c0.value(), c1) + + +def test_name(tagged_sdf): + """ + Make sure that the molecule takes its name from the SDF title field. + """ + assert tagged_sdf.name().value() == tagged_sdf.property("name").value() diff --git a/wrapper/Convert/SireRDKit/sire_rdkit.cpp b/wrapper/Convert/SireRDKit/sire_rdkit.cpp index 35294e201..9a2cf79ca 100644 --- a/wrapper/Convert/SireRDKit/sire_rdkit.cpp +++ b/wrapper/Convert/SireRDKit/sire_rdkit.cpp @@ -597,8 +597,71 @@ namespace SireRDKit RDKit::RWMol molecule; molecule.beginBatchEdit(); + // set the name of the molecule molecule.setProp("_Name", mol.name().value().toStdString()); + // set any SDF tags as properties + std::string sdf_tag; + if (mol.hasProperty("sdf_data")) + { + const auto sdf_data = mol.property("sdf_data").asA(); + + for (const auto &tag : sdf_data.propertyKeys()) + { + try + { + molecule.setProp(tag.toStdString(), sdf_data.property(tag).asAString().toStdString()); + } + catch (...) + { + const auto string_array = sdf_data.property(tag).asA(); + + QString string; + for (int i=0; i(tag.toStdString(), string.toStdString()); + } + } + } + + // set and existing RDKit data as properties + std::string rdkit_tag; + if (mol.hasProperty("rdkit_data")) + { + const auto rdkit_data = mol.property("rdkit_data").asA(); + + for (const auto &tag : rdkit_data.propertyKeys()) + { + try + { + molecule.setProp(tag.toStdString(), rdkit_data.property(tag).asAString().toStdString()); + } + catch (...) + { + const auto string_array = rdkit_data.property(tag).asA(); + + QString string; + for (int i=0; i(tag.toStdString(), string.toStdString()); + } + } + } + const auto atoms = mol.atoms(); QList elements; @@ -793,7 +856,6 @@ namespace SireRDKit try { RDKit::MolOps::sanitizeMol(molecule); - } catch (...) { @@ -997,6 +1059,37 @@ namespace SireRDKit } } + // copy additional properties from the molecule + SireBase::Properties props; + + for (const auto &prop : mol->getPropList()) + { + const auto sire_prop = QString::fromStdString(prop); + + // skip internal properties + if (sire_prop.startsWith("_")) + continue; + + const auto value = QString::fromStdString(mol->getProp(prop)); + const auto list = value.split("\n"); + + // there is a list of values + if (list.count() > 1) + { + props.setProperty(sire_prop, SireBase::wrap(list)); + } + // there is a single value + else + { + props.setProperty(sire_prop, SireBase::wrap(value)); + } + } + + if (not props.isEmpty()) + { + molecule.setProperty("rdkit_data", props); + } + return molecule.commit(); }