Skip to content

Commit

Permalink
Support mol2 files as ground truth (#37)
Browse files Browse the repository at this point in the history
Loading
- Support loading .mol2 files as ground truth (cannot load more than one conformation)

Requirements
- Add support for Python 3.12 and drop 3.7

PoseBuster API
- Add type hint for paths passed as strings
  • Loading branch information
maabuu authored May 22, 2024
1 parent 3c467ab commit 59427c1
Show file tree
Hide file tree
Showing 9 changed files with 4,888 additions and 15 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
fail-fast: true
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python: ["3.7", "3.8", "3.9", "3.10", "3.11"]
python: ["3.8", "3.9", "3.10", "3.11", "3.12"]

steps:

Expand Down
2 changes: 1 addition & 1 deletion posebusters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@
"check_volume_overlap",
]

__version__ = "0.2.12"
__version__ = "0.2.13"
7 changes: 4 additions & 3 deletions posebusters/posebusters.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""PoseBusters class for running all tests on a set of molecules."""

from __future__ import annotations

import inspect
Expand Down Expand Up @@ -73,9 +74,9 @@ def __init__(self, config: str | dict[str, Any] = "redock", top_n: int | None =

def bust(
self,
mol_pred: Iterable[Mol | Path] | Mol | Path,
mol_true: Mol | Path | None = None,
mol_cond: Mol | Path | None = None,
mol_pred: Iterable[Mol | Path | str] | Mol | Path | str,
mol_true: Mol | Path | str | None = None,
mol_cond: Mol | Path | str | None = None,
full_report: bool = False,
) -> pd.DataFrame:
"""Run tests on one or more molecules.
Expand Down
18 changes: 11 additions & 7 deletions posebusters/tools/loading.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Provides functions for loading molecules from files."""

from __future__ import annotations

import logging
Expand Down Expand Up @@ -83,21 +84,24 @@ def _load_mol( # noqa: PLR0913
removeHs=False,
strictParsing=False,
proximityBonding=False,
cleanupSubstructures=False,
cleanupSubstructures=True,
**params,
) -> Mol | None:
"""Load one molecule from a file, picking the right RDKit function."""
"""Load molecule(s) from a file, picking the right RDKit function."""

if load_all and path.suffix == ".sdf":
mol = _load_and_combine_mols(path, sanitize=False, removeHs=removeHs, strictParsing=strictParsing)
elif load_all:
raise ValueError("Can only load multiple conformations from SDF file. Turn off `load_all` option.")
elif path.suffix == ".sdf":
mol = MolFromMolFile(str(path), sanitize=False, removeHs=removeHs, strictParsing=strictParsing)
elif path.suffix == ".mol2":
# MolFromMol2File only loads first molecule from mol2 file
if load_all and sum(ln.strip().startswith("@<TRIPOS>MOLECULE") for ln in open(path).readlines()) > 1:
logger.error("Cannot load multiple molecules from mol2 file, only loading first.")
mol = MolFromMol2File(str(path), sanitize=False, removeHs=removeHs, cleanupSubstructures=cleanupSubstructures)
elif path.suffix == ".pdb":
mol = MolFromPDBFile(str(path), sanitize=False, removeHs=removeHs, proximityBonding=proximityBonding)
elif path.suffix == ".mol":
# .mol files only contain one molecule
block = "".join(open(path).readlines()).strip() + "\nM END"
mol = MolFromMolBlock(block, sanitize=False, removeHs=removeHs, strictParsing=strictParsing)
else:
Expand Down Expand Up @@ -169,15 +173,15 @@ def _assign_bond_order(mol: Mol, smiles) -> Mol:


def _cleanup(mol: Mol) -> Mol:
mol = Cleanup(mol)
Cleanup(mol)
if mol is None:
raise ValueError("Could not cleanup molecule.")
return mol


def _sanitize(mol: Mol) -> Mol:
mol = SanitizeMol(mol)
if mol is None:
flags = SanitizeMol(mol)
if mol is None or flags != 0:
raise ValueError("Could not sanitize molecule.")
return mol

Expand Down
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@ classifiers = [
"Intended Audience :: Science/Research",
"License :: OSI Approved :: BSD License",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Operating System :: OS Independent",
]
requires-python = "~=3.7"
requires-python = "~=3.8"
dynamic = ["version", "description"]
dependencies = ['rdkit >= 2020.09', 'pandas', 'numpy', 'pyyaml']

Expand Down Expand Up @@ -92,7 +92,7 @@ testpaths = "tests"
all = false

[tool.codespell]
skip = '*.pdb,*.sdf,*.po,*.ts'
skip = '*.pdb,*.mol2,*.sdf,*.po,*.ts'
count = ''
quiet-level = 3

Expand Down
4,667 changes: 4,667 additions & 0 deletions tests/conftest/5ze6/5ze6_cond.pdb

Large diffs are not rendered by default.

57 changes: 57 additions & 0 deletions tests/conftest/5ze6/5ze6_pred.sdf
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@

RDKit 3D

25 25 0 0 0 0 0 0 0 0999 V2000
15.8516 4.4637 47.5561 C 0 0 0 0 0 0 0 0 0 0 0 0
15.0235 4.8589 46.3612 C 0 0 0 0 0 0 0 0 0 0 0 0
15.3624 6.2498 45.8600 C 0 0 0 0 0 0 0 0 0 0 0 0
14.4613 6.6734 44.7345 C 0 0 0 0 0 0 0 0 0 0 0 0
14.8133 8.0532 44.2682 C 0 0 0 0 0 0 0 0 0 0 0 0
13.9720 8.5485 43.1521 C 0 0 0 0 0 0 0 0 0 0 0 0
14.3115 9.9783 42.7008 C 0 0 0 0 0 0 0 0 0 0 0 0
13.4033 10.4360 41.6540 C 0 0 0 0 0 0 0 0 0 0 0 0
13.5218 11.7358 41.0196 C 0 0 0 0 0 0 0 0 0 0 0 0
12.5492 12.2672 40.0600 C 0 0 0 0 0 0 0 0 0 0 0 0
12.8865 13.4570 39.2541 C 0 0 0 0 0 0 0 0 0 0 0 0
12.6706 14.7850 39.8125 C 0 0 0 0 0 0 0 0 0 0 0 0
11.4768 15.6022 39.6312 C 0 0 0 0 0 0 0 0 0 0 0 0
11.3578 16.8773 40.3673 C 0 0 0 0 0 0 0 0 0 0 0 0
10.2121 17.6034 40.1137 O 0 0 0 0 0 0 0 0 0 0 0 0
9.6855 18.7573 40.5765 C 0 0 0 0 0 0 0 0 0 0 0 0
10.2498 19.5965 41.5057 C 0 0 0 0 0 0 0 0 0 0 0 0
9.6407 20.7722 41.9354 C 0 0 0 0 0 0 0 0 0 0 0 0
8.4212 21.1360 41.4273 C 0 0 0 0 0 0 0 0 0 0 0 0
7.8221 20.3197 40.4935 C 0 0 0 0 0 0 0 0 0 0 0 0
6.5713 20.6939 39.9737 O 0 0 0 0 0 0 0 0 0 0 0 0
8.4255 19.1672 40.0755 C 0 0 0 0 0 0 0 0 0 0 0 0
7.7816 18.3280 39.0977 C 0 0 0 0 0 0 0 0 0 0 0 0
8.3805 17.3284 38.6386 O 0 0 0 0 0 0 0 0 0 0 0 0
6.5081 18.6037 38.6521 O 0 0 0 0 0 1 0 0 0 0 0 0
1 2 1 0
2 3 1 0
3 4 1 0
4 5 1 0
5 6 1 0
6 7 1 0
7 8 1 0
8 9 1 0
9 10 1 0
10 11 1 0
11 12 1 0
12 13 1 0
13 14 1 0
14 15 1 0
15 16 1 0
16 17 2 0
17 18 1 0
18 19 2 0
19 20 1 0
20 21 1 0
20 22 2 0
22 23 1 0
23 24 2 0
23 25 1 0
22 16 1 0
M CHG 1 25 -1
M END
$$$$
132 changes: 132 additions & 0 deletions tests/conftest/5ze6/5ze6_true.mol2
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
###
### Created by X-TOOL on Mon Aug 2 16:12:25 2021
###

@<TRIPOS>MOLECULE
5ze6_ligand
58 58 1 0 0
SMALL
GAST_HUCK


@<TRIPOS>ATOM
1 CAA 14.9610 4.1610 49.1090 C.3 1 HJX -0.0653
2 OAB 12.0670 19.6400 41.2690 O.co2 1 HJX -0.5680
3 OAC 10.4280 20.3740 42.4060 O.co2 1 HJX -0.5680
4 OAD 8.0010 19.3610 41.6850 O.3 1 HJX -0.3323
5 CAE 8.5580 15.8990 40.6730 C.ar 1 HJX -0.0764
6 CAF 7.9380 17.0910 41.0010 C.ar 1 HJX -0.0404
7 CAG 9.9340 15.8210 40.6710 C.ar 1 HJX -0.0401
8 CAH 15.1530 4.9390 47.8040 C.3 1 HJX -0.0559
9 CAI 14.4330 6.2900 47.8180 C.3 1 HJX -0.0533
10 CAJ 14.8080 7.0790 46.5770 C.3 1 HJX -0.0531
11 CAK 13.8480 8.2020 46.2370 C.3 1 HJX -0.0531
12 CAL 13.8500 8.3760 44.7250 C.3 1 HJX -0.0531
13 CAM 13.7450 9.8320 44.3000 C.3 1 HJX -0.0531
14 CAN 15.0640 10.5620 44.0440 C.3 1 HJX -0.0531
15 CAO 15.3190 11.6890 45.0440 C.3 1 HJX -0.0531
16 CAP 15.3700 13.0810 44.4400 C.3 1 HJX -0.0531
17 CAQ 14.0420 13.8380 44.4810 C.3 1 HJX -0.0530
18 CAR 14.0540 15.0010 43.4780 C.3 1 HJX -0.0506
19 CAS 12.7120 15.4360 42.8760 C.3 1 HJX -0.0249
20 CAT 12.7560 15.5960 41.3510 C.3 1 HJX 0.0712
21 OAU 12.1170 16.7980 40.9750 O.3 1 HJX -0.3172
22 CAV 10.9030 19.4390 41.7020 C.2 1 HJX 0.0532
23 CAW 8.6980 18.1990 41.3430 C.ar 1 HJX 0.0946
24 CAX 10.7130 16.9270 41.0130 C.ar 1 HJX 0.0981
25 CAY 10.0980 18.1550 41.3470 C.ar 1 HJX 0.0664
26 H1 15.4964 3.2021 49.0464 H 1 HJX 0.0230
27 H2 13.8892 3.9727 49.2693 H 1 HJX 0.0230
28 H3 15.3596 4.7493 49.9487 H 1 HJX 0.0230
29 H4 8.6202 20.0490 41.8990 H 1 HJX 0.2496
30 H5 7.9636 15.0290 40.4183 H 1 HJX 0.0584
31 H6 6.8562 17.1591 40.9907 H 1 HJX 0.0458
32 H7 10.4170 14.8890 40.4004 H 1 HJX 0.0459
33 H8 14.7579 4.3369 46.9725 H 1 HJX 0.0263
34 H9 16.2283 5.1135 47.6518 H 1 HJX 0.0263
35 H10 14.7303 6.8542 48.7143 H 1 HJX 0.0265
36 H11 13.3455 6.1253 47.8324 H 1 HJX 0.0265
37 H12 14.8414 6.3847 45.7245 H 1 HJX 0.0265
38 H13 15.8057 7.5147 46.7344 H 1 HJX 0.0265
39 H14 14.1760 9.1337 46.7210 H 1 HJX 0.0265
40 H15 12.8356 7.9460 46.5825 H 1 HJX 0.0265
41 H16 12.9948 7.8251 44.3066 H 1 HJX 0.0265
42 H17 14.7860 7.9595 44.3244 H 1 HJX 0.0265
43 H18 13.2107 10.3747 45.0937 H 1 HJX 0.0265
44 H19 13.1568 9.8680 43.3712 H 1 HJX 0.0265
45 H20 15.0384 10.9898 43.0309 H 1 HJX 0.0265
46 H21 15.8873 9.8360 44.1148 H 1 HJX 0.0265
47 H22 16.2825 11.4956 45.5382 H 1 HJX 0.0265
48 H23 14.5129 11.6707 45.7922 H 1 HJX 0.0265
49 H24 15.6806 12.9885 43.3888 H 1 HJX 0.0265
50 H25 16.1177 13.6685 44.9929 H 1 HJX 0.0265
51 H26 13.8832 14.2356 45.4942 H 1 HJX 0.0265
52 H27 13.2242 13.1482 44.2254 H 1 HJX 0.0265
53 H28 14.7091 14.7091 42.6440 H 1 HJX 0.0266
54 H29 14.4820 15.8742 43.9922 H 1 HJX 0.0266
55 H30 12.4260 16.4007 43.3204 H 1 HJX 0.0291
56 H31 11.9549 14.6783 43.1261 H 1 HJX 0.0291
57 H32 12.2403 14.7461 40.8801 H 1 HJX 0.0616
58 H33 13.8037 15.6200 41.0169 H 1 HJX 0.0616
@<TRIPOS>BOND
1 8 1 1
2 22 2 ar
3 22 3 ar
4 23 4 1
5 6 5 ar
6 5 7 ar
7 23 6 ar
8 7 24 ar
9 9 8 1
10 10 9 1
11 11 10 1
12 12 11 1
13 13 12 1
14 14 13 1
15 15 14 1
16 16 15 1
17 17 16 1
18 18 17 1
19 19 18 1
20 20 19 1
21 21 20 1
22 24 21 1
23 25 22 1
24 23 25 ar
25 24 25 ar
26 1 26 1
27 1 27 1
28 1 28 1
29 4 29 1
30 5 30 1
31 6 31 1
32 7 32 1
33 8 33 1
34 8 34 1
35 9 35 1
36 9 36 1
37 10 37 1
38 10 38 1
39 11 39 1
40 11 40 1
41 12 41 1
42 12 42 1
43 13 43 1
44 13 44 1
45 14 45 1
46 14 46 1
47 15 47 1
48 15 48 1
49 16 49 1
50 16 50 1
51 17 51 1
52 17 52 1
53 18 53 1
54 18 54 1
55 19 55 1
56 19 56 1
57 20 57 1
58 20 58 1
@<TRIPOS>SUBSTRUCTURE
1 HJX 1

12 changes: 12 additions & 0 deletions tests/test_posebusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
mol_larger = "tests/conftest/2HA2_SCK_2HA3_CHT/2HA2_SCK_2HA3_CHT_larger_ligand.sdf"
mol_cond_smaller = "tests/conftest/2HA2_SCK_2HA3_CHT/2HA2_SCK_2HA3_CHT_smaller_receptor.pdb"

mol_true_5ze6 = "tests/conftest/5ze6/5ze6_true.mol2"
mol_pred_5ze6 = "tests/conftest/5ze6/5ze6_pred.sdf"
mol_cond_5ze6 = "tests/conftest/5ze6/5ze6_cond.pdb"


def test_bust_redocks_1ia1() -> None:
posebusters = PoseBusters("redock")
Expand All @@ -40,6 +44,14 @@ def test_bust_redocks_1w1p() -> None:
assert df.all(axis=1).values[0]


def test_bust_redocks_5ze6() -> None:
# check that mol2 files as true molecule can be loaded

posebusters = PoseBusters("redock")
df = posebusters.bust([mol_pred_5ze6], mol_true_5ze6, mol_cond_5ze6)
assert df["mol_true_loaded"].all()


def test_bust_docks() -> None:
posebusters = PoseBusters("dock")
df = posebusters.bust([mol_pred_1ia1], mol_cond=mol_cond_1ia1)
Expand Down

0 comments on commit 59427c1

Please sign in to comment.