haddocking · AnnaKravchenko · Sep 18, 2024 · Sep 23, 2024 · Sep 24, 2024 · Sep 24, 2024
diff --git a/src/haddock/modules/_template_cat/README.md b/src/haddock/modules/_template_cat/README.md
@@ -4,25 +4,25 @@ To develop your own HADDOCK3 module follow the patterns used in other modules.
 
 1. If your module belongs to a new category, create a folder for that category
    under the `modules` folder.
-1. Else, create a folder for your module inside its relevant category.
+2. Else, create a folder for your module inside its relevant category.
    The name of that folder is the name of the module, i.e., the name used to call
    the module in the haddock3 configuration files and used throughout the code base.
-1. Copy the `__init__.py` file here to the new module's folder and edit it accordingly
+3. Copy the `__init__.py` file here to the new module's folder and edit it accordingly
    to the instructions there.
-1. Do the same for the `defaults.yaml` file.
-1. You can then add any extra files needed inside your module's folder in order to
+4. Do the same for the `defaults.yaml` file.
+5. You can then add any extra files needed inside your module's folder in order to
    develop your module fully.
-1. If your module requires any extra libraries, describe how to install those libraries
+6. If your module requires any extra libraries, describe how to install those libraries
    in the `docs/INSTALL.md` file. Unless approved by the Haddock Team, do not add
    those dependencies to the `requirements.*` files.
-1. HADDOCK3 has already many features related to IO, subprocess run, sending jobs,
+7. HADDOCK3 has already many features related to IO, subprocess run, sending jobs,
    etc. Please, look around the `libs` folder for pertinent functions, but, above all,
    feel welcomed to reach out to us with any doubts.
-1. Please write also tests for your module. Our testing machinery already
+8. Please write also tests for your module. Our testing machinery already
    tests for the common patterns, for example, inspecting the `defaults.yaml` file.
    But you should write any additional tests to ensure that your module works properly.
    See other examples in the `tests/` folder.
-1. Finally, add an example of how to use your module in the `examples/` folder.
+9. Finally, add an example of how to use your module in the `examples/` folder.
    The example should have a short sampling scheme. Name the config file ending with
    `-test.cfg`.
 

diff --git a/src/haddock/modules/_template_cat/_template_mod/__init__.py b/src/haddock/modules/_template_cat/_template_mod/__init__.py
@@ -60,7 +60,7 @@ def __init__(
             ) -> None:
 
         # if your module uses CNS you might need to define where the main CNS
-        # script is localted. See examples in `topoaa`, `emref`.
+        # script is located. See examples in `topoaa`, `emref`.
         #  else leave it out.
         # cns_script = Path(RECIPE_PATH, "cns", "main.cns")
 

diff --git a/src/haddock/modules/sampling/attract/__init__.py b/src/haddock/modules/sampling/attract/__init__.py
@@ -0,0 +1,154 @@
+"""attract docking module
+==================================
+
+This module performs a fragment-based single-stranded (ss) RNA-protein docking 
+using ATTRACT docking engine. This docking approach was developed to tackle the 
+flexibility of ssRNA. Its core idea is to split ssRNA chain into overlapping 
+trinucleotides (fragments), and dock them onto the rigid receptor separately, 
+assembling the fragments back into the whole chain models afterwards.
+
+#todo 
+add short description of the protocol, including CG, NAlib, sampling, 
+scoring (two ways) and assembly + possible restraints. 
+."""
+import os
+import sys
+import subprocess
+import shutil
+import shlex
+from pathlib import Path
+from haddock.modules.sampling.attract.attractmodule import (
+    rename_and_coarse_grain,
+    process_rna_file,
+)
+
+from haddock import log
+from haddock.core.defaults import MODULE_DEFAULT_YAML
+from haddock.core.typing import FilePath, Any 
+#from haddock.libs import libpdb
+#from haddock.libs.libio import working_directory
+from haddock.libs.libontology import Format, PDBFile
+#from haddock.libs.libutil import check_subprocess
+from haddock.modules import BaseHaddockModule
+
+RECIPE_PATH = Path(__file__).resolve().parent
+DEFAULT_CONFIG = Path(RECIPE_PATH, MODULE_DEFAULT_YAML)
+
+class HaddockModule(BaseHaddockModule):
+    """ATTRACT module."""
+
+    name = RECIPE_PATH.name
+
+    def __init__(self,
+                 order: int,
+                 path: Path,
+                 initial_params: FilePath = DEFAULT_CONFIG) -> None:
+        super().__init__(order, path, initial_params)
+
+    @classmethod
+    def confirm_installation(cls) -> None:
+        """Confirm that ATTRACT and its environment variables are properly set."""
+        try:
+            attract_dir = os.environ['ATTRACTDIR']
+            attract_tools = os.environ['ATTRACTTOOLS']
+            nalib = os.environ['LIBRARY']
+        except KeyError as e:
+            raise EnvironmentError(f"Required environment variable not found: {e}")
+
+        attract_exec = Path(attract_dir, 'attract')
+        result = subprocess.run([str(attract_exec)], capture_output=True, text=True)
+
+        if "Too few arguments" not in result.stderr:
+            raise RuntimeError('ATTRACT is not installed properly')
+
+        # pass paths to _run for further use
+        cls.attract_dir = attract_dir
+        cls.attract_tools = attract_tools
+        cls.nalib = nalib
+
+    def _run(self) -> None:
+        """Execute module.
+
+            # todo 
+            1. check library + 
+            2. process input +
+            3. make folder +
+            4. run docking < currently working on > 
+            5. run lrmsd (eventually optional)
+            6. HIPPO (eventually optional)
+            7. Assembly ?
+            8. Scoring ?
+            9. SeleTopChains ? """
+
+        # Get the models generated in previous step
+        models: list[PDBFile] = [
+            p for p in self.previous_io.output
+            if p.file_type == Format.PDB ]
+
+        # Check that exactly two models are provided
+        # practically attract needs protein structure and RNA *sequence* 
+        # but at this stage it's more practical to ask for RNA structure     
+        if len(models) !=2 : 
+            _msg = "ATTRACT requires exactly two molecules"
+            self.finish_with_error(_msg)
+
+        # Copy each model to the working directory
+        for model in models:
+            src_model = Path(model.path, model.file_name)
+            dest_model = Path(os.getcwd(), model.file_name)
+            shutil.copyfile(src_model, dest_model)
+
+        # Ensure we have exactly protein and RNA molecules
+        model_1 = models[0]
+        model_2 = models[1]
+
+        attracttools = self.attract_tools
+        attrac_reduce_path = Path(attracttools, 'reduce.py')
+
+        _, label_1 = rename_and_coarse_grain(model_1.file_name, attrac_reduce_path)
+        _, label_2 = rename_and_coarse_grain(model_2.file_name, attrac_reduce_path)
+
+        if {label_1, label_2} != {'protein', 'rna'}:
+            _msg = "ATTRACT requires protein and RNA molecules as input"
+            self.finish_with_error(_msg)
+
+        # Add required by ATTRACT files: 
+        # - link fragment library          
+        # - get motif.list, boundfrag.list, and fragXr.pdb 
+        log.info("Preparing docking directory")
+
+        nalib = self.nalib
+        cmd = f"ln -s {nalib} nalib"
+        p = subprocess.run(shlex.split(cmd), capture_output=True)
+        err = p.stderr.decode('utf-8')
+
+        process_rna_file('rna-aar.pdb')
+
+        log.info("Running ATTRACT...")
+        cmd = [
+        'bash', '/trinity/login/arha/tools/scripts/attract/docking/dock-lrmsd-in-haddock.sh',
+        '.',
+        '10',
+        '/trinity/login/arha/dev-h3/test-attr/tmp'
+        ]
+
+        docking = subprocess.run(cmd, capture_output=True, text=True)
+
+        with open('log.txt','w') as f:
+            for item in docking.stdout:
+                f.write(f"{item}")
+
+        with open('err.txt', 'w') as f:
+                    for item in docking.stderr:
+                        f.write(f"{item}")
+
+        list_of_created_models = []     
+        created_models = ['protein-aa.pdb','rna-aa.pdb']
+        for model in created_models:
+            pdb_object = PDBFile(Path(model).name,  path=".")
+            list_of_created_models.append(pdb_object)
+
+        self.output_models = list_of_created_models
+        self.export_io_models()
+
+
diff --git a/src/haddock/modules/sampling/attract/attractmodule.py b/src/haddock/modules/sampling/attract/attractmodule.py
@@ -0,0 +1,131 @@
+"""Set if functions related to [attract]"""
+
+import os
+import sys 
+import subprocess
+import shutil
+import shlex
+from pathlib import Path
+
+
+def rename_and_coarse_grain(pdb_file: str, reduce_path: str, lines_to_check: int = 10):
+    """
+    Check several last lines of pdb file to label it as RNA, DNA or protein; 
+    Then reduce it to ATTRACT coarse grained representation.
+
+    Args:
+    pdb_file (str): Path to the PDB file.
+    lines_to_check (int): Number of lines to check from the end of the file.
+    reduce_path (str); Path to $ATTRACTTOOLD/reduce.py 
+
+    Returns:
+    subprocess.CompletedProcess: Result of the subprocess run. 
+    (ATTRACT CG atom mapping)
+    molecule_type (str): Label of the input file
+    """
+    dna_residues = {'DA', 'DG', 'DC', 'DT'}
+    rna_residues = {'A', 'U', 'G', 'C', 'RA', 'RU', 'RG', 'RC'}
+
+    old_path = Path(pdb_file)
+
+    with old_path.open('r') as f:
+        lines = f.readlines()[-lines_to_check:]
+
+    for line in lines:
+        if line.startswith("ATOM"):
+            residue_name = line[17:20].strip()
+            if residue_name in dna_residues:
+                molecule_type = 'dna'
+            elif residue_name in rna_residues:
+                molecule_type = 'rna'
+            else:
+                molecule_type = 'protein'
+
+        new_path = old_path.with_name(f'{molecule_type}-aa.pdb')
+        old_path.replace(new_path)
+
+        cmd = [sys.executable, reduce_path]
+        if molecule_type in ('dna', 'rna'):
+            cmd.append(f'--{molecule_type}')
+        cmd.append(str(new_path))
+
+        return subprocess.run(cmd, capture_output=True, text=True), molecule_type
+
+def process_rna_file(pdb_file: str = 'rna-aar.pdb'):
+    """
+    Process PDB file to generate RNA fragment and motif lists, and extract individual fragments.
+    Save generated files. 
+
+    Args:
+    pdb_file (str): Path to reduced RNA PDB file.    
+
+    Note: 
+    This script works on all-atom pdb as well
+    """
+    enumer_nucl = []
+    sequence = ''
+    fragments = []
+    residue_buffer = []  
+    current_residue = [] 
+    last_residue_id = None
+
+    with open(pdb_file, 'r') as f:
+        for line in f:
+            if line.startswith("ATOM"):
+                residue_name = line[17:20].strip()[-1]    
+                residue_id = int(line[22:26].strip())
+
+                # Get sequence
+                if [residue_name, residue_id] not in enumer_nucl:
+                    enumer_nucl.append([residue_name, residue_id])
+                    sequence += residue_name
+
+                # Get fragments
+                if last_residue_id is None or residue_id == last_residue_id:
+                    current_residue.append(line)
+                else:
+                    if current_residue:
+                        residue_buffer.append(current_residue)
+                        if len(residue_buffer) == 3:
+                            fragments.append(residue_buffer[:])
+                            residue_buffer.pop(0) 
+                    current_residue = [line]
+                last_residue_id = residue_id 
+
+        # Add the last residue if any
+        if current_residue:
+            residue_buffer.append(current_residue)
+            if len(residue_buffer) == 3:
+                fragments.append(residue_buffer[:])
+
+    # Generate motif and boundfrag lists
+    prev_frag = sequence[:3]
+    count = 1
+    boundfrag_list = [[count, prev_frag]]
+    motif_list = [prev_frag]
+
+    for x in sequence[3:]:
+        count += 1
+        next_frag = prev_frag[1:] + x
+        prev_frag = next_frag
+        boundfrag_list.append([count, next_frag])
+        if prev_frag not in motif_list: 
+            motif_list.append(next_frag)
+
+    # Save boundfrag_list
+    with open('boundfrag.list', 'w') as f:
+        for item in boundfrag_list:
+            f.write(f"{item[0]} {item[1]}\n")
+    # Save motif_list
+    with open('motif.list', 'w') as f:
+        for item in motif_list:
+            f.write(f"{item}\n")
+    # Save each fragments
+    for i, fragment in enumerate(fragments, start=1):
+        with open(f"frag{i}r.pdb", 'w') as frag_file:
+            for nucleotide in fragment:
+                for atomline in nucleotide:
+                    if not atomline.endswith('\n'):
+                        atomline += '\n'
+                    frag_file.write(atomline)
+    return