Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[attract] module #1034

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions src/haddock/modules/_template_cat/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,25 @@ To develop your own HADDOCK3 module follow the patterns used in other modules.

1. If your module belongs to a new category, create a folder for that category
under the `modules` folder.
1. Else, create a folder for your module inside its relevant category.
2. Else, create a folder for your module inside its relevant category.
rvhonorato marked this conversation as resolved.
Show resolved Hide resolved
The name of that folder is the name of the module, i.e., the name used to call
the module in the haddock3 configuration files and used throughout the code base.
1. Copy the `__init__.py` file here to the new module's folder and edit it accordingly
3. Copy the `__init__.py` file here to the new module's folder and edit it accordingly
to the instructions there.
1. Do the same for the `defaults.yaml` file.
1. You can then add any extra files needed inside your module's folder in order to
4. Do the same for the `defaults.yaml` file.
5. You can then add any extra files needed inside your module's folder in order to
develop your module fully.
1. If your module requires any extra libraries, describe how to install those libraries
6. If your module requires any extra libraries, describe how to install those libraries
in the `docs/INSTALL.md` file. Unless approved by the Haddock Team, do not add
those dependencies to the `requirements.*` files.
1. HADDOCK3 has already many features related to IO, subprocess run, sending jobs,
7. HADDOCK3 has already many features related to IO, subprocess run, sending jobs,
etc. Please, look around the `libs` folder for pertinent functions, but, above all,
feel welcomed to reach out to us with any doubts.
1. Please write also tests for your module. Our testing machinery already
8. Please write also tests for your module. Our testing machinery already
tests for the common patterns, for example, inspecting the `defaults.yaml` file.
But you should write any additional tests to ensure that your module works properly.
See other examples in the `tests/` folder.
1. Finally, add an example of how to use your module in the `examples/` folder.
9. Finally, add an example of how to use your module in the `examples/` folder.
The example should have a short sampling scheme. Name the config file ending with
`-test.cfg`.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def __init__(
) -> None:

# if your module uses CNS you might need to define where the main CNS
# script is localted. See examples in `topoaa`, `emref`.
# script is located. See examples in `topoaa`, `emref`.
# else leave it out.
# cns_script = Path(RECIPE_PATH, "cns", "main.cns")

Expand Down
154 changes: 154 additions & 0 deletions src/haddock/modules/sampling/attract/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
"""attract docking module
==================================

This module performs a fragment-based single-stranded (ss) RNA-protein docking
using ATTRACT docking engine. This docking approach was developed to tackle the
flexibility of ssRNA. Its core idea is to split ssRNA chain into overlapping
trinucleotides (fragments), and dock them onto the rigid receptor separately,
assembling the fragments back into the whole chain models afterwards.

#todo
add short description of the protocol, including CG, NAlib, sampling,
scoring (two ways) and assembly + possible restraints.
."""
import os
import sys
import subprocess
import shutil
import shlex
from pathlib import Path
from haddock.modules.sampling.attract.attractmodule import (
rename_and_coarse_grain,
process_rna_file,
)

from haddock import log
from haddock.core.defaults import MODULE_DEFAULT_YAML
from haddock.core.typing import FilePath, Any
#from haddock.libs import libpdb
#from haddock.libs.libio import working_directory
from haddock.libs.libontology import Format, PDBFile
#from haddock.libs.libutil import check_subprocess
from haddock.modules import BaseHaddockModule

RECIPE_PATH = Path(__file__).resolve().parent
DEFAULT_CONFIG = Path(RECIPE_PATH, MODULE_DEFAULT_YAML)

class HaddockModule(BaseHaddockModule):
"""ATTRACT module."""

name = RECIPE_PATH.name

def __init__(self,
order: int,
path: Path,
initial_params: FilePath = DEFAULT_CONFIG) -> None:
super().__init__(order, path, initial_params)

@classmethod
def confirm_installation(cls) -> None:
"""Confirm that ATTRACT and its environment variables are properly set."""
try:
attract_dir = os.environ['ATTRACTDIR']
attract_tools = os.environ['ATTRACTTOOLS']
nalib = os.environ['LIBRARY']
except KeyError as e:
raise EnvironmentError(f"Required environment variable not found: {e}")

attract_exec = Path(attract_dir, 'attract')
result = subprocess.run([str(attract_exec)], capture_output=True, text=True)

if "Too few arguments" not in result.stderr:
raise RuntimeError('ATTRACT is not installed properly')

# pass paths to _run for further use
cls.attract_dir = attract_dir
cls.attract_tools = attract_tools
cls.nalib = nalib

def _run(self) -> None:
"""Execute module.

# todo
1. check library +
2. process input +
3. make folder +
4. run docking < currently working on >
5. run lrmsd (eventually optional)
6. HIPPO (eventually optional)
7. Assembly ?
8. Scoring ?
9. SeleTopChains ? """

# Get the models generated in previous step
models: list[PDBFile] = [
p for p in self.previous_io.output
if p.file_type == Format.PDB ]

# Check that exactly two models are provided
# practically attract needs protein structure and RNA *sequence*
# but at this stage it's more practical to ask for RNA structure
if len(models) !=2 :
_msg = "ATTRACT requires exactly two molecules"
self.finish_with_error(_msg)

# Copy each model to the working directory
for model in models:
src_model = Path(model.path, model.file_name)
dest_model = Path(os.getcwd(), model.file_name)
shutil.copyfile(src_model, dest_model)

# Ensure we have exactly protein and RNA molecules
model_1 = models[0]
model_2 = models[1]

attracttools = self.attract_tools
attrac_reduce_path = Path(attracttools, 'reduce.py')

_, label_1 = rename_and_coarse_grain(model_1.file_name, attrac_reduce_path)
_, label_2 = rename_and_coarse_grain(model_2.file_name, attrac_reduce_path)

if {label_1, label_2} != {'protein', 'rna'}:
_msg = "ATTRACT requires protein and RNA molecules as input"
self.finish_with_error(_msg)

# Add required by ATTRACT files:
# - link fragment library
# - get motif.list, boundfrag.list, and fragXr.pdb
log.info("Preparing docking directory")

nalib = self.nalib
cmd = f"ln -s {nalib} nalib"
p = subprocess.run(shlex.split(cmd), capture_output=True)
err = p.stderr.decode('utf-8')

process_rna_file('rna-aar.pdb')

log.info("Running ATTRACT...")
cmd = [
'bash', '/trinity/login/arha/tools/scripts/attract/docking/dock-lrmsd-in-haddock.sh',
'.',
'10',
'/trinity/login/arha/dev-h3/test-attr/tmp'
]

docking = subprocess.run(cmd, capture_output=True, text=True)

with open('log.txt','w') as f:
for item in docking.stdout:
f.write(f"{item}")

with open('err.txt', 'w') as f:
for item in docking.stderr:
f.write(f"{item}")

list_of_created_models = []
created_models = ['protein-aa.pdb','rna-aa.pdb']
for model in created_models:
pdb_object = PDBFile(Path(model).name, path=".")
list_of_created_models.append(pdb_object)

self.output_models = list_of_created_models
self.export_io_models()


131 changes: 131 additions & 0 deletions src/haddock/modules/sampling/attract/attractmodule.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
"""Set if functions related to [attract]"""

import os
import sys
import subprocess
import shutil
import shlex
from pathlib import Path


def rename_and_coarse_grain(pdb_file: str, reduce_path: str, lines_to_check: int = 10):
"""
Check several last lines of pdb file to label it as RNA, DNA or protein;
Then reduce it to ATTRACT coarse grained representation.

Args:
pdb_file (str): Path to the PDB file.
lines_to_check (int): Number of lines to check from the end of the file.
reduce_path (str); Path to $ATTRACTTOOLD/reduce.py

Returns:
subprocess.CompletedProcess: Result of the subprocess run.
(ATTRACT CG atom mapping)
molecule_type (str): Label of the input file
"""
dna_residues = {'DA', 'DG', 'DC', 'DT'}
rna_residues = {'A', 'U', 'G', 'C', 'RA', 'RU', 'RG', 'RC'}

old_path = Path(pdb_file)

with old_path.open('r') as f:
lines = f.readlines()[-lines_to_check:]

for line in lines:
if line.startswith("ATOM"):
residue_name = line[17:20].strip()
if residue_name in dna_residues:
molecule_type = 'dna'
elif residue_name in rna_residues:
molecule_type = 'rna'
else:
molecule_type = 'protein'

new_path = old_path.with_name(f'{molecule_type}-aa.pdb')
old_path.replace(new_path)

cmd = [sys.executable, reduce_path]
if molecule_type in ('dna', 'rna'):
cmd.append(f'--{molecule_type}')
cmd.append(str(new_path))

return subprocess.run(cmd, capture_output=True, text=True), molecule_type

def process_rna_file(pdb_file: str = 'rna-aar.pdb'):
"""
Process PDB file to generate RNA fragment and motif lists, and extract individual fragments.
Save generated files.

Args:
pdb_file (str): Path to reduced RNA PDB file.

Note:
This script works on all-atom pdb as well
"""
enumer_nucl = []
sequence = ''
fragments = []
residue_buffer = []
current_residue = []
last_residue_id = None

with open(pdb_file, 'r') as f:
for line in f:
if line.startswith("ATOM"):
residue_name = line[17:20].strip()[-1]
residue_id = int(line[22:26].strip())

# Get sequence
if [residue_name, residue_id] not in enumer_nucl:
enumer_nucl.append([residue_name, residue_id])
sequence += residue_name

# Get fragments
if last_residue_id is None or residue_id == last_residue_id:
current_residue.append(line)
else:
if current_residue:
residue_buffer.append(current_residue)
if len(residue_buffer) == 3:
fragments.append(residue_buffer[:])
residue_buffer.pop(0)
current_residue = [line]
last_residue_id = residue_id

# Add the last residue if any
if current_residue:
residue_buffer.append(current_residue)
if len(residue_buffer) == 3:
fragments.append(residue_buffer[:])

# Generate motif and boundfrag lists
prev_frag = sequence[:3]
count = 1
boundfrag_list = [[count, prev_frag]]
motif_list = [prev_frag]

for x in sequence[3:]:
count += 1
next_frag = prev_frag[1:] + x
prev_frag = next_frag
boundfrag_list.append([count, next_frag])
if prev_frag not in motif_list:
motif_list.append(next_frag)

# Save boundfrag_list
with open('boundfrag.list', 'w') as f:
for item in boundfrag_list:
f.write(f"{item[0]} {item[1]}\n")
# Save motif_list
with open('motif.list', 'w') as f:
for item in motif_list:
f.write(f"{item}\n")
# Save each fragments
for i, fragment in enumerate(fragments, start=1):
with open(f"frag{i}r.pdb", 'w') as frag_file:
for nucleotide in fragment:
for atomline in nucleotide:
if not atomline.endswith('\n'):
atomline += '\n'
frag_file.write(atomline)
return
Loading
Loading