galaxycomputationalchemistry · bgruening · Jun 6, 2024 · May 23, 2024 · Jun 5, 2024 · Jun 5, 2024
diff --git a/tools/buildtools/protein_structure_rebuilders/BioPDB/.shed.yml b/tools/buildtools/protein_structure_rebuilders/BioPDB/.shed.yml
@@ -0,0 +1,15 @@
+name: biopdb
+owner: chemteam
+description: "Biopython is a collection of modules for dealing with biological data in Python."
+homepage_url: https://biopython.org/
+long_description: |
+   Biophython (https://biopython.org/) is a collection of modules for dealing with biological data in Python. This particular set of tools uses the Bio.PDB package to analyze and compare protein structures.
+remote_repository_url: https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/
+type: unrestricted
+categories:
+  - Computational chemistry
+auto_tool_repositories:
+  name_template: "{{ tool_id }}"
+  description_template: "Wrapper for Biopython's Bio.PDB package: {{ tool_name }}"
+maintainers:
+  - thepineapplepirate
diff --git a/tools/buildtools/protein_structure_rebuilders/BioPDB/BioPDB_align_and_rmsd.py b/tools/buildtools/protein_structure_rebuilders/BioPDB/BioPDB_align_and_rmsd.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+
+# The MIT License
+#
+# Copyright (c) 2010-2016 Anders S. Christensen
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+import argparse
+
+import Bio.PDB
+
+
+def __main__():
+    parser = argparse.ArgumentParser(
+        description='Residues to be aligned')
+    parser.add_argument(
+                        '--start_residue', default=None,
+                        help='start residue')
+    parser.add_argument(
+                        '--end_residue', default=None,
+                        help='end residue')
+    parser.add_argument(
+                        '--ref_structure', default=None,
+                        help='reference structure')
+    parser.add_argument(
+                        '--model', default=None,
+                        help='model structure')
+    parser.add_argument(
+                        '--aligned_structure', default=None,
+                        help='aligned structure')
+    parser.add_argument(
+                        '--rmsd', default=None,
+                        help='rmsd')
+    args = parser.parse_args()
+
+    # Select what residues numbers you wish to align
+    # and put them in a list
+    start_id = int(args.start_residue)
+    end_id = int(args.end_residue)
+    atoms_to_be_aligned = range(start_id, end_id + 1)
+
+    # Start the parser
+    pdb_parser = Bio.PDB.PDBParser(QUIET=True)
+
+    # Get the structures
+    ref_structure = pdb_parser.get_structure("reference", args.ref_structure)
+    sample_structure = pdb_parser.get_structure("sample", args.model)
+
+    # Use the first model in the pdb-files for alignment
+    # Change the number 0 if you want to align to another structure
+    ref_model = ref_structure[0]
+    sample_model = sample_structure[0]
+
+    # Make a list of the atoms (in the structures) you wish to align.
+    # In this case we use CA atoms whose index is in the specified range
+    ref_atoms = []
+    sample_atoms = []
+
+    # Iterate of all chains in the model in order to find all residues
+    for ref_chain in ref_model:
+        # Iterate of all residues in each model in order to find proper atoms
+        for ref_res in ref_chain:
+            # Check if residue number ( .get_id() ) is in the list
+            if ref_res.get_id()[1] in atoms_to_be_aligned:
+                # Append CA atom to list
+                ref_atoms.append(ref_res['CA'])
+
+    # Do the same for the sample structure
+    for sample_chain in sample_model:
+        for sample_res in sample_chain:
+            if sample_res.get_id()[1] in atoms_to_be_aligned:
+                sample_atoms.append(sample_res['CA'])
+
+    # Now we initiate the superimposer:
+    super_imposer = Bio.PDB.Superimposer()
+    super_imposer.set_atoms(ref_atoms, sample_atoms)
+    super_imposer.apply(sample_model.get_atoms())
+
+    # Save RMSD into an output file:
+    with open(args.rmsd, 'w') as rmsd_out:
+        rmsd_out.write(str(super_imposer.rms))
+
+    # Save aligned coordinates of the model:
+    io = Bio.PDB.PDBIO()
+    io.set_structure(sample_structure)
+    io.save(args.aligned_structure)
+
+
+if __name__ == "__main__":
+    __main__()
diff --git a/tools/buildtools/protein_structure_rebuilders/BioPDB/BioPDB_align_and_rmsd.xml b/tools/buildtools/protein_structure_rebuilders/BioPDB/BioPDB_align_and_rmsd.xml
@@ -0,0 +1,69 @@
+<tool id="biopdb_align_and_rmsd" name="Align structures and compute relative RMSDs" version="@TOOL_VERSION@+galaxy@GALAXY_VERSION@">
+    <description>using Biopython</description>
+    <macros>
+        <token name="@TOOL_VERSION@">1.79</token>
+        <token name="@GALAXY_VERSION@">0</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="1.79">biopython</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+
+    python3 '$__tool_directory__/BioPDB_align_and_rmsd.py' --start_residue '$start_residue' --end_residue '$end_residue' --ref_structure '$ref_structure' --model '$model' --aligned_structure '$aligned_structure' --rmsd '$rmsd_out' >> verbose.txt 2>&1
+
+    ]]></command>
+    <inputs>
+        <param name="start_residue" type="integer" label="Starting residue to align" help="This should be the same starting residue for the model and reference structure." value="0" />
+        <param name="end_residue" type="integer" label="Ending residue to align" help="This should be the same ending residue for the model and reference structure." value="0"/>
+        <param name="ref_structure" type="data" format="pdb" label="Reference structure" help="This can be an experimental structure or another model you wish to compare against."/>
+        <param name="model" type="data" format="pdb" label="Sample structure" help="This is the structure you wish to align and compute a relative RMSD for. This can also be an experimental structure or another model."/>
+    </inputs>
+
+    <outputs>
+        <data name="aligned_structure" format="pdb" label="PDB file of the aligned structure ${on_string}"></data>
+        <data name="rmsd_out" format="tabular" label="RMSD of the aligned structure ${on_string}"></data>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="start_residue" value="1" />
+            <param name="end_residue" value="15"/>
+            <param name="ref_structure" value="reference_structure.pdb" />
+            <param name="model" value="model.pdb" />
+            <output name="aligned_structure" ftype="pdb">
+                <assert_contents>
+                    <has_text text="ATOM      5  CA  ARG     1       6.016   6.125  -0.066  1.00  0.00           C"/>
+                    <has_text text="ATOM     29  CA  HIS     2       4.909   5.168   3.434  1.00  0.00           C"/>
+                    <has_text text="ATOM     46  CA  TYR     3       7.956   4.326   5.551  1.00  0.00           C"/>
+                </assert_contents>
+            </output>
+            <output name="rmsd_out" ftype="tabular">
+                <assert_contents>
+                    <has_text text="5.492787964992471"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+
+        <help><![CDATA[
+Tool to align protein structures and compute relative RMSDs, using the alpha carbon coordinates.
+
+.. class:: infomark
+
+**Inputs**
+
+PDB files for the reference structure, a model, as well as the starting and ending residues for the alignment.
+
+.. class:: infomark
+
+**Outputs**
+
+1) Tabular file containing the RMSD.
+2) PDB file of the model with the aligned coordinates.
+
+
+        ]]></help>
+    <citations>
+        <citation type="doi">https://doi.org/10.1093/bioinformatics/btp163</citation>
+    </citations>
+</tool>