Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix mol objects bug #40

Merged
merged 4 commits into from
Jun 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -249,5 +249,24 @@
"long",
]
},
{
"name": "bust -- memory problem",
"type": "debugpy",
"request": "launch",
"cwd": "${workspaceFolder}",
"module": "posebusters",
"justMyCode": true,
"stopOnEntry": false,
"args": [
"tests/conftest/1W1P_GIO/1W1P_GIO_ligand.sdf",
"-l",
"tests/conftest/1W1P_GIO/1W1P_GIO_ligands.sdf",
"-p",
"tests/conftest/1W1P_GIO/1W1P_GIO_protein.pdb",
"--full-report",
"--outfmt",
"long",
]
},
]
}
2 changes: 1 addition & 1 deletion posebusters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@
"check_volume_overlap",
]

__version__ = "0.2.13"
__version__ = "0.2.14"
37 changes: 25 additions & 12 deletions posebusters/modules/sucos.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module to calculate SuCOS score."""

from __future__ import annotations

import os
Expand All @@ -8,7 +9,7 @@
from rdkit.Chem.FeatMaps import FeatMaps
from rdkit.Chem.rdchem import Mol
from rdkit.Chem.rdMolChemicalFeatures import BuildFeatureFactory
from rdkit.Chem.rdmolops import SanitizeMol
from rdkit.Chem.rdmolops import AddHs, RemoveHs, SanitizeMol
from rdkit.Chem.rdShapeHelpers import ShapeProtrudeDist

FACTORY = BuildFeatureFactory(os.path.join(RDConfig.RDDataDir, "BaseFeatures.fdef"))
Expand All @@ -30,8 +31,7 @@ def get_feature_map_score(
mol_large: Mol,
conf_id_small: int = -1,
conf_id_large: int = -1,
score_mode=FeatMaps.FeatMapScoreMode.Best,
):
) -> float:
"""Calculate the feature map score between two molecules.

Good introduction:
Expand All @@ -44,7 +44,7 @@ def get_feature_map_score(

# feature map based on small molecule
feature_map = FeatMaps.FeatMap(feats=features_small, weights=[1] * len(features_small), params=PARAMETERS)
feature_map.scoreMode = score_mode
feature_map.scoreMode = FeatMaps.FeatMapScoreMode.Best

# score feature in large molecule present in small molecule
feature_map_score = feature_map.ScoreFeats(features_large) / min(feature_map.GetNumFeatures(), len(features_large))
Expand All @@ -55,9 +55,9 @@ def get_feature_map_score(
def get_sucos_score(
mol_reference: Mol,
mol_probe: Mol,
score_mode=FeatMaps.FeatMapScoreMode.Best,
conf_id_reference: int = -1,
conf_id_probe: int = -1,
heavy_only: bool | None = True,
) -> float:
"""Calculate the SuCOS score between a reference ligand and a list of probe ligands.

Expand All @@ -69,15 +69,19 @@ def get_sucos_score(
SuCOS score.

Notes:
SuCOS described in https://chemrxiv.org/engage/chemrxiv/article-details/60c741a99abda23230f8bed5
Adapted from https://github.com/MarcMoesser/SuCOS/blob/master/calc_SuCOS_normalized.py
"""

# explicit or implicit hydrogens should be same for both molecules
mol_reference = handle_hydrogens(mol_reference, heavy_only=heavy_only)
mol_probe = handle_hydrogens(mol_probe, heavy_only=heavy_only)

feature_map_score = get_feature_map_score(
mol_small=mol_reference,
mol_large=mol_probe,
conf_id_small=conf_id_reference,
conf_id_large=conf_id_probe,
score_mode=score_mode,
)
protrusion_distance = ShapeProtrudeDist(
mol1=mol_reference,
Expand All @@ -94,23 +98,23 @@ def get_sucos_score(


def check_sucos(mol_pred: Mol, mol_true: Mol, sucos_threshold: float = 0.4) -> dict[str, dict[str, bool | float]]:
"""Calculate RMSD and related metrics between predicted molecule and closest ground truth molecule.
"""Calculate SuCOS and related metrics between predicted molecule and closest ground truth molecule.

Args:
mol_pred: Predicted molecule (docked ligand) with exactly one conformer.
mol_true: Ground truth molecule (crystal ligand) with at least one conformer. If multiple conformers are
present, the lowest RMSD will be reported.
rmsd_threshold: Threshold in angstrom for reporting whether RMSD is within threshold. Defaults to 2.0.
heavy_only: Whether to only consider heavy atoms for RMSD calculation. Defaults to True.
present, the highest SuCOS will be reported.
sucos_threshold: Threshold in angstrom for reporting whether SuCOS is within threshold. Defaults to 0.4.
heavy_only: Whether to only consider heavy atoms for SuCOS calculation. Defaults to True.

Returns:
PoseBusters results dictionary.
"""
assert isinstance(mol_true, Mol), "Ground truth molecule is missing."
assert isinstance(mol_pred, Mol), "Predicted molecule is missing."
num_conf = mol_true.GetNumConformers()
assert num_conf > 0, "Crystal ligand needs at least one conformer."
assert mol_pred.GetNumConformers() == 1, "Docked ligand should only have one conformer."
assert num_conf > 0, "Ground truth molecule needs at least one conformer."
assert mol_pred.GetNumConformers() == 1, "Predicted molecule should only have one conformer."

try:
SanitizeMol(mol_pred)
Expand All @@ -125,3 +129,12 @@ def check_sucos(mol_pred: Mol, mol_true: Mol, sucos_threshold: float = 0.4) -> d

results = {"sucos": best_sucos, "sucos_within_threshold": sucos_within_threshold}
return {"results": results}


def handle_hydrogens(mol: Mol, heavy_only: bool | None = True) -> Mol:
"""Remove, add or do not modify hydrogens in a molecule."""
if heavy_only is None:
return mol
if heavy_only:
return RemoveHs(mol)
return AddHs(mol, addCoords=True)
4 changes: 2 additions & 2 deletions posebusters/posebusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,10 @@ def bust(
Returns:
Pandas dataframe with results.
"""
mol_pred = [mol_pred] if isinstance(mol_pred, (Mol, Path, str)) else mol_pred
mol_pred_list: Iterable[Mol | Path | str] = [mol_pred] if isinstance(mol_pred, (Mol, Path, str)) else mol_pred

columns = ["mol_pred", "mol_true", "mol_cond"]
self.file_paths = pd.DataFrame([[mol_pred, mol_true, mol_cond] for mol_pred in mol_pred], columns=columns)
self.file_paths = pd.DataFrame([[mol_pred, mol_true, mol_cond] for mol_pred in mol_pred_list], columns=columns)

results_gen = self._run()

Expand Down
3 changes: 3 additions & 0 deletions posebusters/tools/loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ def safe_load_mol(path: Path, load_all: bool = False, **load_params) -> Mol | No
Returns:
Molecule object or None if loading failed.
"""
if isinstance(path, Mol):
return path

try:
path = _check_path(path)
with CaptureLogger():
Expand Down
10 changes: 10 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,3 +185,13 @@ def mol_cond_7ztl_bcn():
@pytest.fixture
def mol_disconnnected_atoms():
return MolFromMolFile("tests/conftest/mol_disconnected_atoms.sdf", sanitize=True)


@pytest.fixture
def mol_small_7brv_f5r_7wb6_f5r():
return MolFromMolFile("tests/conftest/7BRV_F5R_7WB6_F5R/7BRV_F5R_7WB6_F5R_smaller_ligand.sdf", removeHs=True)


@pytest.fixture
def mol_large_7brv_f5r_7wb6_f5r():
return MolFromMolFile("tests/conftest/7BRV_F5R_7WB6_F5R/7BRV_F5R_7WB6_F5R_larger_ligand.sdf", removeHs=True)
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
right_molecule_with_hydrogens
RDKit 3D

17 17 0 0 0 0 0 0 0 0999 V2000
57.8140 43.3950 19.4830 N 0 0 0 0 0 0 0 0 0 0 0 0
55.9570 43.8700 15.0390 C 0 0 0 0 0 0 0 0 0 0 0 0
56.2270 43.1270 16.1730 C 0 0 0 0 0 0 0 0 0 0 0 0
56.8350 43.7120 17.2820 C 0 0 0 0 0 0 0 0 0 0 0 0
57.1250 42.9160 18.5000 C 0 0 0 0 0 0 0 0 0 0 0 0
57.1620 45.0630 17.2290 C 0 0 0 0 0 0 0 0 0 0 0 0
56.8990 45.8100 16.0990 C 0 0 0 0 0 0 0 0 0 0 0 0
56.3030 45.2030 15.0180 C 0 0 0 0 0 0 0 0 0 0 0 0
56.6450 41.6610 18.5400 N 0 0 0 0 0 0 0 0 0 0 0 0
55.9500 46.2400 13.4670 Br 0 0 0 0 0 0 0 0 0 0 0 0
58.1060 44.1550 19.3610 H 0 0 0 0 0 0 0 0 0 0 0 0
55.5430 43.4720 14.2930 H 0 0 0 0 0 0 0 0 0 0 0 0
55.9960 42.2150 16.1950 H 0 0 0 0 0 0 0 0 0 0 0 0
57.5680 45.4740 17.9720 H 0 0 0 0 0 0 0 0 0 0 0 0
57.1240 46.7240 16.0670 H 0 0 0 0 0 0 0 0 0 0 0 0
56.1730 41.3460 17.8580 H 0 0 0 0 0 0 0 0 0 0 0 0
56.8010 41.1510 19.2480 H 0 0 0 0 0 0 0 0 0 0 0 0
1 5 2 3
1 11 1 0
2 3 2 0
2 8 1 0
2 12 1 0
3 4 1 0
3 13 1 0
4 5 1 0
4 6 2 0
5 9 1 0
6 7 1 0
6 14 1 0
7 8 2 0
7 15 1 0
8 10 1 0
9 16 1 0
9 17 1 0
M END
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
left_molecule_without_hydrogens
RDKit 3D

10 10 0 0 0 0 0 0 0 0999 V2000
57.7298 43.4513 19.4458 N 0 0 0 0 0 0 0 0 0 0 0 0
56.8728 45.7841 16.0476 C 0 0 0 0 0 0 0 0 0 0 0 0
57.1267 45.0725 17.1867 C 0 0 0 0 0 0 0 0 0 0 0 0
56.8325 43.7291 17.2368 C 0 0 0 0 0 0 0 0 0 0 0 0
57.0936 42.9364 18.4399 C 0 0 0 0 0 0 0 0 0 0 0 0
56.1174 43.1560 16.2069 C 0 0 0 0 0 0 0 0 0 0 0 0
55.8545 43.8496 15.0648 C 0 0 0 0 0 0 0 0 0 0 0 0
56.2497 45.1542 15.0115 C 0 0 0 0 0 0 0 0 0 0 0 0
56.7194 41.6618 18.4761 N 0 0 0 0 0 0 0 0 0 0 0 0
55.9070 46.2021 13.4738 Br 0 0 0 0 0 0 0 0 0 0 0 0
1 5 2 3
2 3 2 0
2 8 1 0
3 4 1 0
4 5 1 0
4 6 2 0
5 9 1 0
6 7 1 0
7 8 2 0
8 10 1 0
M END
19 changes: 18 additions & 1 deletion tests/test_modules/test_sucos.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from __future__ import annotations

import pytest
from rdkit.Chem.rdmolops import AddHs, RemoveHs

from posebusters.modules.sucos import check_sucos


def test_check_sucos(mol_rq3_x00, mol_rq3_x01, mol_rq3_x10):
def test_check_sucos(mol_rq3_x00, mol_rq3_x01, mol_rq3_x10, mol_small_7brv_f5r_7wb6_f5r, mol_large_7brv_f5r_7wb6_f5r):
out = check_sucos(mol_rq3_x00, mol_rq3_x00)
assert out["results"]["sucos"] == pytest.approx(1.0, abs=1e-6)

Expand All @@ -16,6 +17,22 @@ def test_check_sucos(mol_rq3_x00, mol_rq3_x01, mol_rq3_x10):
assert out["results"]["sucos"] == pytest.approx(0.0, abs=1e-6)


def test_check_sucos_hydrogens(mol_small_7brv_f5r_7wb6_f5r, mol_large_7brv_f5r_7wb6_f5r):
out = check_sucos(
AddHs(mol_large_7brv_f5r_7wb6_f5r, addCoords=True), AddHs(mol_small_7brv_f5r_7wb6_f5r, addCoords=True)
)
assert out["results"]["sucos"] <= 1.0

out = check_sucos(RemoveHs(mol_large_7brv_f5r_7wb6_f5r), RemoveHs(mol_small_7brv_f5r_7wb6_f5r))
assert out["results"]["sucos"] <= 1.0

out = check_sucos(AddHs(mol_large_7brv_f5r_7wb6_f5r, addCoords=True), RemoveHs(mol_small_7brv_f5r_7wb6_f5r))
assert out["results"]["sucos"] <= 1.0

out = check_sucos(RemoveHs(mol_small_7brv_f5r_7wb6_f5r), AddHs(mol_large_7brv_f5r_7wb6_f5r, addCoords=True))
assert out["results"]["sucos"] <= 1.0


def test_check_sucos_multiple_ground_truth(mol_one_true_1w1p, mol_true_1w1p):
out = check_sucos(mol_one_true_1w1p, mol_true_1w1p)
assert out["results"]["sucos"] == pytest.approx(1.0, abs=1e-6)
15 changes: 14 additions & 1 deletion tests/test_posebusters.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import math

from rdkit.Chem.rdmolfiles import MolFromSmiles
from rdkit.Chem.rdmolfiles import MolFromMolFile, MolFromPDBFile, MolFromSmiles

from posebusters import PoseBusters

Expand Down Expand Up @@ -107,3 +107,16 @@ def test_bust_gen() -> None:
posebusters = PoseBusters("gen")
result = posebusters.bust(mol_pred=mol_larger, mol_true=mol_smaller, mol_cond=mol_cond_smaller, full_report=True)
assert result["sucos"][0] > 0.2


def test_bust_loaded_mols() -> None:
posebuster = PoseBusters("redock")

mol_pred = MolFromMolFile(mol_pred_1ia1)
mol_true = MolFromMolFile(mol_true_1ia1)
mol_cond = MolFromPDBFile(mol_cond_1ia1)

df = posebuster.bust([mol_pred], mol_true, mol_cond)
assert df["mol_pred_loaded"].all()
assert df["mol_true_loaded"].all()
assert df["mol_cond_loaded"].all()
Loading