Skip to content

Commit

Permalink
Merge pull request #690 from haddocking/alascan2.0
Browse files Browse the repository at this point in the history
alascan module
  • Loading branch information
rvhonorato authored Oct 31, 2023
2 parents 5d84af1 + 7d5ae9a commit 7d2dfff
Show file tree
Hide file tree
Showing 16 changed files with 1,412 additions and 19 deletions.
2 changes: 2 additions & 0 deletions codecov.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ignore:
- "tests"
37 changes: 37 additions & 0 deletions examples/analysis/alascan-test.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# ==================================================
# Alanine Scan with HADDOCK3
#
# This example workflow will refine a complex in 10
# different models, cluster them, and then do an
# alanine scan across the whole interface.
# ==================================================

# General parameters
run_dir = "run1-alanine-scan"
ncores = 10

# Input
molecules = ["../docking-protein-protein/data/e2a-hpr_1GGR.pdb"]

# Workflow definition
# ====================================================================
[topoaa]
autohis = true

[mdref]
# this will produce 10 refined models
sampling_factor = 10

[caprieval]
reference_fname="../docking-protein-protein/data/e2a-hpr_1GGR.pdb"

[rmsdmatrix]

[clustrmsd]
tolerance=2

[alascan]
scan_residue="ALA"
output=true
plot=true
int_cutoff = 3.0
73 changes: 73 additions & 0 deletions integration_tests/test_alascan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import tempfile
from pathlib import Path

import pytest
import shutil
import pandas as pd
import numpy as np

from haddock.modules.analysis.alascan import DEFAULT_CONFIG as DEFAULT_ALASCAN_CONFIG
from haddock.modules.analysis.alascan import HaddockModule as AlascanModule
from haddock.libs.libontology import PDBFile
from . import CNS_EXEC, DATA_DIR, has_cns
from tests import golden_data

@pytest.fixture
def alascan_module():
"""Return a default alascan module."""
with tempfile.TemporaryDirectory(dir=".") as tmpdir:
alascan = AlascanModule(
order=0, path=".", initial_params=DEFAULT_ALASCAN_CONFIG
)
alascan.params["int_cutoff"] = 3.5
yield alascan

class MockPreviousIO():
def __init__(self, path):
self.path = path

def retrieve_models(self, individualize: bool = False):
shutil.copy(Path(golden_data, "protprot_complex_1.pdb"), Path(".", "protprot_complex_1.pdb"))
shutil.copy(Path(golden_data, "protprot_complex_2.pdb"), Path(".", "protprot_complex_2.pdb"))
model_list = [
PDBFile(file_name="protprot_complex_1.pdb", path="."),
PDBFile(file_name="protprot_complex_2.pdb", path="."),
]

return model_list

def output(self):
return None

@has_cns
def test_alascan_default(alascan_module, mocker):
"""Test the alascan module."""
alascan_module.previous_io = MockPreviousIO(path=alascan_module.path)
alascan_module.run()

expected_csv1 = Path(alascan_module.path, "scan_protprot_complex_1.csv")
expected_csv2 = Path(alascan_module.path, "scan_protprot_complex_2.csv")
expected_clt_csv = Path(alascan_module.path, "scan_clt_-.csv")

assert expected_csv1.exists(), f"{expected_csv1} does not exist"
assert expected_csv2.exists(), f"{expected_csv2} does not exist"
assert expected_clt_csv.exists(), f"{expected_clt_csv} does not exist"

# check single complex csv
df = pd.read_csv(expected_csv1, sep="\t", comment="#")
assert df.shape == (10, 16), f"{expected_csv1} has wrong shape"
# ARG 17 B should have a delta_score approximately equal to 28.53
assert np.isclose(
df.loc[df["ori_resname"] == "ARG"].iloc[0,:]["delta_score"],
28.53,
atol=10)

# check clt csv
df_clt = pd.read_csv(expected_clt_csv, sep="\t", comment="#")
assert df_clt.shape == (18, 11), f"{expected_clt_csv} has wrong shape"
# average delta score of A-38-ASP should be around 8.18
assert np.isclose(
df_clt.loc[df_clt["full_resname"] == "A-38-ASP"]["delta_score"],
8.18,
atol=2)

32 changes: 20 additions & 12 deletions src/haddock/clis/cli_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@

ap.add_argument("pdb_file", help="Input PDB file")

ap.add_argument(
"--run_dir",
default="haddock-score-client",
type=str,
required=False,
help="Run directory name.",
)

ap.add_argument(
"--full",
action="store_true",
Expand Down Expand Up @@ -98,13 +106,14 @@ def maincli() -> None:


def main(
pdb_file: FilePath,
full: bool = False,
outputpdb: bool = False,
outputpsf: bool = False,
keep_all: bool = False,
**kwargs: Any,
) -> None:
pdb_file: FilePath,
run_dir: FilePath,
full: bool = False,
outputpdb: bool = False,
outputpsf: bool = False,
keep_all: bool = False,
**kwargs: Any,
) -> None:
"""
Calculate the score of a complex using the ``emscoring`` module.
Expand Down Expand Up @@ -180,7 +189,7 @@ def main(

print("> starting calculations...")

run_dir = Path("haddock-score-client")
run_dir = Path(run_dir)
with suppress(FileNotFoundError):
shutil.rmtree(run_dir)
run_dir.mkdir()
Expand Down Expand Up @@ -241,10 +250,9 @@ def main(
shutil.rmtree(run_dir)
else:
print(
"The folder where the calculations where performed was kept. See "
"folder: haddock-scoring-client"
)

'The folder where the calculations where performed was kept. See '
f'folder: {run_dir}'
)

if __name__ == "__main__":
sys.exit(maincli()) # type: ignore
18 changes: 14 additions & 4 deletions src/haddock/libs/libalign.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,8 +402,13 @@ def centroid(X: NDFloat) -> NDFloat:


def load_coords(
pdb_f, atoms, filter_resdic=None, numbering_dic=None, model2ref_chain_dict=None
):
pdb_f,
atoms,
filter_resdic=None,
numbering_dic=None,
model2ref_chain_dict=None,
add_resname=None,
):
"""
Load coordinates from PDB.
Expand All @@ -420,6 +425,9 @@ def load_coords(
numbering_dic : dict
dict of numbering dictionaries (one dictionary per chain)
add_resname : bool
use the residue name in the identifier
Returns
-------
coord_dic : dict
Expand Down Expand Up @@ -460,8 +468,10 @@ def load_coords(
# " was not matched!"
# )
continue
# identifier = f"{chain}.{resnum}.{atom_name}"
identifier = (chain, resnum, atom_name)
if add_resname is True:
identifier = (chain, resnum, atom_name, resname)
else:
identifier = (chain, resnum, atom_name)
if atom_name not in atoms[resname]:
continue
if chain not in chain_dic:
Expand Down
30 changes: 30 additions & 0 deletions src/haddock/libs/libparallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,36 @@ def split_tasks(lst: Sequence[AnyT],
yield chunk


def get_index_list(nmodels, ncores):
"""
Optimal distribution of models among cores
Parameters
----------
nmodels : int
Number of models to be distributed.
ncores : int
Number of cores to be used.
Returns
-------
index_list : list
List of model indexes to be used for the parallel scanning.
"""
spc = nmodels // ncores
# now the remainder
rem = nmodels % ncores
# now the list of indexes to be used for the SCAN calculation
index_list = [0]
for core in range(ncores):
if core < rem:
index_list.append(index_list[-1] + spc + 1)
else:
index_list.append(index_list[-1] + spc)
return index_list


class Worker(Process):
"""Work on tasks."""

Expand Down
83 changes: 83 additions & 0 deletions src/haddock/libs/libplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -993,3 +993,86 @@ def report_generator(boxes, scatters, tables, step):
html_report = _generate_html_report(step, figures)
with open("report.html", "w", encoding="utf-8") as report:
report.write(html_report)


def make_alascan_plot(df, clt_id, scan_res="ALA"):
"""
Make a plotly interactive plot.
Score components are here **weighted** by their respective
contribution to the total score.
Parameters
----------
df : pandas.DataFrame
DataFrame containing the results of the alanine scan.
clt_id : int
Cluster ID.
scan_res : str, optional
Residue name used for the scan, by default "ALA"
"""
plot_name = f"scan_clt_{clt_id}"
log.info(f"Generating {scan_res} scanning plot {plot_name}")

# create figure
fig = go.Figure(layout={"width": 2000, "height": 1000})
# add traces
fig.add_trace(
go.Bar(
x=df["full_resname"],
y=df["delta_score"],
name="delta_score",
)
)

fig.add_trace(
go.Bar(
x=df["full_resname"],
y=df["delta_vdw"],
name="delta_vdw",
)
)
# delta_elec is given its weight in the emscoring module
fig.add_trace(
go.Bar(
x=df["full_resname"],
y=0.2 * df["delta_elec"],
name="delta_elec",
)
)

fig.add_trace(
go.Bar(
x=df["full_resname"],
y=df["delta_desolv"],
name="delta_desolv",
)
)
# prettifying layout
fig.update_layout(
title=f"{scan_res} scanning cluster {clt_id}",
xaxis=dict(
title="Residue Name",
tickfont_size=14,
titlefont_size=16,
tick0=df["full_resname"],
# in case we want to show less residues
# dtick=10,
),
yaxis=dict(
title="Weigted delta",
titlefont_size=16,
tickfont_size=14,
),
legend=dict(x=1.01, y=1.0, font_family="Helvetica", font_size=16),
barmode="group",
bargap=0.05,
bargroupgap=0.05,
hovermode="x unified",
hoverlabel=dict(font_size=16, font_family="Helvetica"),
)
for n in range(df.shape[0] - 1):
fig.add_vline(x=0.5 + n, line_color="gray", opacity=0.2)
# save html
html_output_filename = f"{plot_name}.html"
fig.write_html(html_output_filename)
2 changes: 1 addition & 1 deletion src/haddock/modules/analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import Iterable


modules_using_resdic = ("caprieval", "rmsdmatrix")
modules_using_resdic = ("caprieval", "rmsdmatrix", "alascan")


def confirm_resdic_chainid_length(params: Iterable[str]) -> None:
Expand Down
Loading

0 comments on commit 7d2dfff

Please sign in to comment.