Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support mol2 files as ground truth #37

Merged
merged 10 commits into from
May 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
fail-fast: true
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python: ["3.7", "3.8", "3.9", "3.10", "3.11"]
python: ["3.8", "3.9", "3.10", "3.11", "3.12"]

steps:

Expand Down
2 changes: 1 addition & 1 deletion posebusters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@
"check_volume_overlap",
]

__version__ = "0.2.12"
__version__ = "0.2.13"
7 changes: 4 additions & 3 deletions posebusters/posebusters.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""PoseBusters class for running all tests on a set of molecules."""

from __future__ import annotations

import inspect
Expand Down Expand Up @@ -73,9 +74,9 @@ def __init__(self, config: str | dict[str, Any] = "redock", top_n: int | None =

def bust(
self,
mol_pred: Iterable[Mol | Path] | Mol | Path,
mol_true: Mol | Path | None = None,
mol_cond: Mol | Path | None = None,
mol_pred: Iterable[Mol | Path | str] | Mol | Path | str,
mol_true: Mol | Path | str | None = None,
mol_cond: Mol | Path | str | None = None,
full_report: bool = False,
) -> pd.DataFrame:
"""Run tests on one or more molecules.
Expand Down
18 changes: 11 additions & 7 deletions posebusters/tools/loading.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Provides functions for loading molecules from files."""

from __future__ import annotations

import logging
Expand Down Expand Up @@ -83,21 +84,24 @@ def _load_mol( # noqa: PLR0913
removeHs=False,
strictParsing=False,
proximityBonding=False,
cleanupSubstructures=False,
cleanupSubstructures=True,
**params,
) -> Mol | None:
"""Load one molecule from a file, picking the right RDKit function."""
"""Load molecule(s) from a file, picking the right RDKit function."""

if load_all and path.suffix == ".sdf":
mol = _load_and_combine_mols(path, sanitize=False, removeHs=removeHs, strictParsing=strictParsing)
elif load_all:
raise ValueError("Can only load multiple conformations from SDF file. Turn off `load_all` option.")
elif path.suffix == ".sdf":
mol = MolFromMolFile(str(path), sanitize=False, removeHs=removeHs, strictParsing=strictParsing)
elif path.suffix == ".mol2":
# MolFromMol2File only loads first molecule from mol2 file
if load_all and sum(ln.strip().startswith("@<TRIPOS>MOLECULE") for ln in open(path).readlines()) > 1:
logger.error("Cannot load multiple molecules from mol2 file, only loading first.")
mol = MolFromMol2File(str(path), sanitize=False, removeHs=removeHs, cleanupSubstructures=cleanupSubstructures)
elif path.suffix == ".pdb":
mol = MolFromPDBFile(str(path), sanitize=False, removeHs=removeHs, proximityBonding=proximityBonding)
elif path.suffix == ".mol":
# .mol files only contain one molecule
block = "".join(open(path).readlines()).strip() + "\nM END"
mol = MolFromMolBlock(block, sanitize=False, removeHs=removeHs, strictParsing=strictParsing)
else:
Expand Down Expand Up @@ -169,15 +173,15 @@ def _assign_bond_order(mol: Mol, smiles) -> Mol:


def _cleanup(mol: Mol) -> Mol:
mol = Cleanup(mol)
Cleanup(mol)
if mol is None:
raise ValueError("Could not cleanup molecule.")
return mol


def _sanitize(mol: Mol) -> Mol:
mol = SanitizeMol(mol)
if mol is None:
flags = SanitizeMol(mol)
if mol is None or flags != 0:
raise ValueError("Could not sanitize molecule.")
return mol

Expand Down
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@ classifiers = [
"Intended Audience :: Science/Research",
"License :: OSI Approved :: BSD License",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Operating System :: OS Independent",
]
requires-python = "~=3.7"
requires-python = "~=3.8"
dynamic = ["version", "description"]
dependencies = ['rdkit >= 2020.09', 'pandas', 'numpy', 'pyyaml']

Expand Down Expand Up @@ -92,7 +92,7 @@ testpaths = "tests"
all = false

[tool.codespell]
skip = '*.pdb,*.sdf,*.po,*.ts'
skip = '*.pdb,*.mol2,*.sdf,*.po,*.ts'
count = ''
quiet-level = 3

Expand Down
4,667 changes: 4,667 additions & 0 deletions tests/conftest/5ze6/5ze6_cond.pdb

Large diffs are not rendered by default.

57 changes: 57 additions & 0 deletions tests/conftest/5ze6/5ze6_pred.sdf
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@

RDKit 3D

25 25 0 0 0 0 0 0 0 0999 V2000
15.8516 4.4637 47.5561 C 0 0 0 0 0 0 0 0 0 0 0 0
15.0235 4.8589 46.3612 C 0 0 0 0 0 0 0 0 0 0 0 0
15.3624 6.2498 45.8600 C 0 0 0 0 0 0 0 0 0 0 0 0
14.4613 6.6734 44.7345 C 0 0 0 0 0 0 0 0 0 0 0 0
14.8133 8.0532 44.2682 C 0 0 0 0 0 0 0 0 0 0 0 0
13.9720 8.5485 43.1521 C 0 0 0 0 0 0 0 0 0 0 0 0
14.3115 9.9783 42.7008 C 0 0 0 0 0 0 0 0 0 0 0 0
13.4033 10.4360 41.6540 C 0 0 0 0 0 0 0 0 0 0 0 0
13.5218 11.7358 41.0196 C 0 0 0 0 0 0 0 0 0 0 0 0
12.5492 12.2672 40.0600 C 0 0 0 0 0 0 0 0 0 0 0 0
12.8865 13.4570 39.2541 C 0 0 0 0 0 0 0 0 0 0 0 0
12.6706 14.7850 39.8125 C 0 0 0 0 0 0 0 0 0 0 0 0
11.4768 15.6022 39.6312 C 0 0 0 0 0 0 0 0 0 0 0 0
11.3578 16.8773 40.3673 C 0 0 0 0 0 0 0 0 0 0 0 0
10.2121 17.6034 40.1137 O 0 0 0 0 0 0 0 0 0 0 0 0
9.6855 18.7573 40.5765 C 0 0 0 0 0 0 0 0 0 0 0 0
10.2498 19.5965 41.5057 C 0 0 0 0 0 0 0 0 0 0 0 0
9.6407 20.7722 41.9354 C 0 0 0 0 0 0 0 0 0 0 0 0
8.4212 21.1360 41.4273 C 0 0 0 0 0 0 0 0 0 0 0 0
7.8221 20.3197 40.4935 C 0 0 0 0 0 0 0 0 0 0 0 0
6.5713 20.6939 39.9737 O 0 0 0 0 0 0 0 0 0 0 0 0
8.4255 19.1672 40.0755 C 0 0 0 0 0 0 0 0 0 0 0 0
7.7816 18.3280 39.0977 C 0 0 0 0 0 0 0 0 0 0 0 0
8.3805 17.3284 38.6386 O 0 0 0 0 0 0 0 0 0 0 0 0
6.5081 18.6037 38.6521 O 0 0 0 0 0 1 0 0 0 0 0 0
1 2 1 0
2 3 1 0
3 4 1 0
4 5 1 0
5 6 1 0
6 7 1 0
7 8 1 0
8 9 1 0
9 10 1 0
10 11 1 0
11 12 1 0
12 13 1 0
13 14 1 0
14 15 1 0
15 16 1 0
16 17 2 0
17 18 1 0
18 19 2 0
19 20 1 0
20 21 1 0
20 22 2 0
22 23 1 0
23 24 2 0
23 25 1 0
22 16 1 0
M CHG 1 25 -1
M END
$$$$
132 changes: 132 additions & 0 deletions tests/conftest/5ze6/5ze6_true.mol2
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
###
### Created by X-TOOL on Mon Aug 2 16:12:25 2021
###

@<TRIPOS>MOLECULE
5ze6_ligand
58 58 1 0 0
SMALL
GAST_HUCK


@<TRIPOS>ATOM
1 CAA 14.9610 4.1610 49.1090 C.3 1 HJX -0.0653
2 OAB 12.0670 19.6400 41.2690 O.co2 1 HJX -0.5680
3 OAC 10.4280 20.3740 42.4060 O.co2 1 HJX -0.5680
4 OAD 8.0010 19.3610 41.6850 O.3 1 HJX -0.3323
5 CAE 8.5580 15.8990 40.6730 C.ar 1 HJX -0.0764
6 CAF 7.9380 17.0910 41.0010 C.ar 1 HJX -0.0404
7 CAG 9.9340 15.8210 40.6710 C.ar 1 HJX -0.0401
8 CAH 15.1530 4.9390 47.8040 C.3 1 HJX -0.0559
9 CAI 14.4330 6.2900 47.8180 C.3 1 HJX -0.0533
10 CAJ 14.8080 7.0790 46.5770 C.3 1 HJX -0.0531
11 CAK 13.8480 8.2020 46.2370 C.3 1 HJX -0.0531
12 CAL 13.8500 8.3760 44.7250 C.3 1 HJX -0.0531
13 CAM 13.7450 9.8320 44.3000 C.3 1 HJX -0.0531
14 CAN 15.0640 10.5620 44.0440 C.3 1 HJX -0.0531
15 CAO 15.3190 11.6890 45.0440 C.3 1 HJX -0.0531
16 CAP 15.3700 13.0810 44.4400 C.3 1 HJX -0.0531
17 CAQ 14.0420 13.8380 44.4810 C.3 1 HJX -0.0530
18 CAR 14.0540 15.0010 43.4780 C.3 1 HJX -0.0506
19 CAS 12.7120 15.4360 42.8760 C.3 1 HJX -0.0249
20 CAT 12.7560 15.5960 41.3510 C.3 1 HJX 0.0712
21 OAU 12.1170 16.7980 40.9750 O.3 1 HJX -0.3172
22 CAV 10.9030 19.4390 41.7020 C.2 1 HJX 0.0532
23 CAW 8.6980 18.1990 41.3430 C.ar 1 HJX 0.0946
24 CAX 10.7130 16.9270 41.0130 C.ar 1 HJX 0.0981
25 CAY 10.0980 18.1550 41.3470 C.ar 1 HJX 0.0664
26 H1 15.4964 3.2021 49.0464 H 1 HJX 0.0230
27 H2 13.8892 3.9727 49.2693 H 1 HJX 0.0230
28 H3 15.3596 4.7493 49.9487 H 1 HJX 0.0230
29 H4 8.6202 20.0490 41.8990 H 1 HJX 0.2496
30 H5 7.9636 15.0290 40.4183 H 1 HJX 0.0584
31 H6 6.8562 17.1591 40.9907 H 1 HJX 0.0458
32 H7 10.4170 14.8890 40.4004 H 1 HJX 0.0459
33 H8 14.7579 4.3369 46.9725 H 1 HJX 0.0263
34 H9 16.2283 5.1135 47.6518 H 1 HJX 0.0263
35 H10 14.7303 6.8542 48.7143 H 1 HJX 0.0265
36 H11 13.3455 6.1253 47.8324 H 1 HJX 0.0265
37 H12 14.8414 6.3847 45.7245 H 1 HJX 0.0265
38 H13 15.8057 7.5147 46.7344 H 1 HJX 0.0265
39 H14 14.1760 9.1337 46.7210 H 1 HJX 0.0265
40 H15 12.8356 7.9460 46.5825 H 1 HJX 0.0265
41 H16 12.9948 7.8251 44.3066 H 1 HJX 0.0265
42 H17 14.7860 7.9595 44.3244 H 1 HJX 0.0265
43 H18 13.2107 10.3747 45.0937 H 1 HJX 0.0265
44 H19 13.1568 9.8680 43.3712 H 1 HJX 0.0265
45 H20 15.0384 10.9898 43.0309 H 1 HJX 0.0265
46 H21 15.8873 9.8360 44.1148 H 1 HJX 0.0265
47 H22 16.2825 11.4956 45.5382 H 1 HJX 0.0265
48 H23 14.5129 11.6707 45.7922 H 1 HJX 0.0265
49 H24 15.6806 12.9885 43.3888 H 1 HJX 0.0265
50 H25 16.1177 13.6685 44.9929 H 1 HJX 0.0265
51 H26 13.8832 14.2356 45.4942 H 1 HJX 0.0265
52 H27 13.2242 13.1482 44.2254 H 1 HJX 0.0265
53 H28 14.7091 14.7091 42.6440 H 1 HJX 0.0266
54 H29 14.4820 15.8742 43.9922 H 1 HJX 0.0266
55 H30 12.4260 16.4007 43.3204 H 1 HJX 0.0291
56 H31 11.9549 14.6783 43.1261 H 1 HJX 0.0291
57 H32 12.2403 14.7461 40.8801 H 1 HJX 0.0616
58 H33 13.8037 15.6200 41.0169 H 1 HJX 0.0616
@<TRIPOS>BOND
1 8 1 1
2 22 2 ar
3 22 3 ar
4 23 4 1
5 6 5 ar
6 5 7 ar
7 23 6 ar
8 7 24 ar
9 9 8 1
10 10 9 1
11 11 10 1
12 12 11 1
13 13 12 1
14 14 13 1
15 15 14 1
16 16 15 1
17 17 16 1
18 18 17 1
19 19 18 1
20 20 19 1
21 21 20 1
22 24 21 1
23 25 22 1
24 23 25 ar
25 24 25 ar
26 1 26 1
27 1 27 1
28 1 28 1
29 4 29 1
30 5 30 1
31 6 31 1
32 7 32 1
33 8 33 1
34 8 34 1
35 9 35 1
36 9 36 1
37 10 37 1
38 10 38 1
39 11 39 1
40 11 40 1
41 12 41 1
42 12 42 1
43 13 43 1
44 13 44 1
45 14 45 1
46 14 46 1
47 15 47 1
48 15 48 1
49 16 49 1
50 16 50 1
51 17 51 1
52 17 52 1
53 18 53 1
54 18 54 1
55 19 55 1
56 19 56 1
57 20 57 1
58 20 58 1
@<TRIPOS>SUBSTRUCTURE
1 HJX 1

12 changes: 12 additions & 0 deletions tests/test_posebusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
mol_larger = "tests/conftest/2HA2_SCK_2HA3_CHT/2HA2_SCK_2HA3_CHT_larger_ligand.sdf"
mol_cond_smaller = "tests/conftest/2HA2_SCK_2HA3_CHT/2HA2_SCK_2HA3_CHT_smaller_receptor.pdb"

mol_true_5ze6 = "tests/conftest/5ze6/5ze6_true.mol2"
mol_pred_5ze6 = "tests/conftest/5ze6/5ze6_pred.sdf"
mol_cond_5ze6 = "tests/conftest/5ze6/5ze6_cond.pdb"


def test_bust_redocks_1ia1() -> None:
posebusters = PoseBusters("redock")
Expand All @@ -40,6 +44,14 @@ def test_bust_redocks_1w1p() -> None:
assert df.all(axis=1).values[0]


def test_bust_redocks_5ze6() -> None:
# check that mol2 files as true molecule can be loaded

posebusters = PoseBusters("redock")
df = posebusters.bust([mol_pred_5ze6], mol_true_5ze6, mol_cond_5ze6)
assert df["mol_true_loaded"].all()


def test_bust_docks() -> None:
posebusters = PoseBusters("dock")
df = posebusters.bust([mol_pred_1ia1], mol_cond=mol_cond_1ia1)
Expand Down
Loading