Merge pull request #11 from luponzo86/master

update to v.0.9.7
prody · Dec 13, 2019 · de60e62 · de60e62
2 parents 219aad5 + 95cb130
commit de60e62
Show file tree

Hide file tree

Showing 32 changed files with 6,703 additions and 167 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -38,6 +38,7 @@ before_install:
   # install dependencies for rhapsody
   - conda install requests numpy scikit-learn
   - conda install matplotlib
+  - pip install tqdm
 
 install:
   # - python setup.py install
@@ -46,4 +47,5 @@ install:
 script:
   # - pytest test1.py
   - cd test/test-1; python test-1.py
+  - cd ../test-2; python test-2.py
 
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1,2 @@
+include rhapsody/VERSION
+include rhapsody/data.tar.gz
diff --git a/VERSION b/VERSION
diff --git a/docs/README b/docs/README
@@ -0,0 +1,26 @@
+Example of documentation string:
+
+function(arg, kwarg=None):
+""" General description. Please use this syntax rules:
+
+- :class:`~numpy.ndarray`
+- :meth:`setSomething` or :meth:`.setSomething`
+- :func:`someFunction` or :func:`.someFunction`
+- **None**
+You can insert refs too ([ref]_).
+
+:arg arg: detailed description.
+  This is how you make a bullet list in an argument's description:
+
+  - if *arg* = **None** (default), blah blah
+  - if *arg* = ``'something else'``, blah blah
+:type arg: str, list, bool, whatever
+:arg kwarg: blah blah
+:type kwarg: str
+:return: something
+:rtype: str
+
+.. [ref] Ponzoni L, Bahar I. Structural dynamics is a determinant of
+  the functional significance of missense variants. *PNAS* **2018**
+  115 (16) 4164-4169.
+"""
diff --git a/docs/conf.py b/docs/conf.py
@@ -42,7 +42,8 @@
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ['_build', '_backup', 'Thumbs.db', '.DS_Store', '**tar.gz**']
+exclude_patterns = ['_build', '_backup', 'Thumbs.db', '.DS_Store',
+                    '**tar.gz**']
 
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = 'sphinx'
@@ -60,6 +61,17 @@
 # autosummary_generate = True
 
 
+# functions for including __init__ documentation
+def skip(app, what, name, obj, would_skip, options):
+    if name == "__init__":
+        return False
+    return would_skip
+
+
+def setup(app):
+    app.connect("autodoc-skip-member", skip)
+
+
 # -- Options for HTML output -------------------------------------------------
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for

diff --git a/docs/rhapsody.utils.rst b/docs/rhapsody.utils.rst
@@ -9,6 +9,14 @@ rhapsody.utils package
 Submodules
 ----------
 
+rhapsody.utils.misc module
+--------------------------
+
+.. automodule:: rhapsody.utils.misc
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 rhapsody.utils.settings module
 ------------------------------
 

diff --git a/rhapsody/VERSION b/rhapsody/VERSION
@@ -0,0 +1 @@
+0.9.7
diff --git a/rhapsody/__init__.py b/rhapsody/__init__.py
@@ -6,11 +6,17 @@
 from os import path
 
 here = path.abspath(path.dirname(__file__))
-with open(path.join(here, '../VERSION'), encoding='utf-8') as f:
+with open(path.join(here, 'VERSION'), encoding='utf-8') as f:
     __version__ = f.read()
 
 __release__ = __version__
 
+__author__ = "Luca Ponzoni"
+__date__ = "December 2019"
+__maintainer__ = "Luca Ponzoni"
+__email__ = "[email protected]"
+__status__ = "Production"
+
 __all__ = []
 
 from . import utils

diff --git a/rhapsody/features/BLOSUM.py b/rhapsody/features/BLOSUM.py
@@ -5,6 +5,12 @@
 import numpy as np
 from Bio.SubsMat.MatrixInfo import blosum62
 
+__author__ = "Luca Ponzoni"
+__date__ = "December 2019"
+__maintainer__ = "Luca Ponzoni"
+__email__ = "[email protected]"
+__status__ = "Production"
+
 __all__ = ['BLOSUM_FEATS', 'calcBLOSUMfeatures']
 
 BLOSUM_FEATS = ['BLOSUM']

diff --git a/rhapsody/features/EVmutation.py b/rhapsody/features/EVmutation.py
@@ -7,12 +7,11 @@
 from os.path import splitext, join, basename
 from prody import SETTINGS, LOGGER
 
-# extract precomputed EVmutation scores for given mutants
-# NB:
-# negative DeltaE_epist --> deleterious effect
-# DeltaE_epist == 0     --> neutral effect (wild-type)
-# positive DeltaE_epist --> neutral/benign effect
-
+__author__ = "Luca Ponzoni"
+__date__ = "December 2019"
+__maintainer__ = "Luca Ponzoni"
+__email__ = "[email protected]"
+__status__ = "Production"
 
 __all__ = ['EVMUT_FEATS', 'recoverEVmutFeatures']
 
@@ -35,6 +34,12 @@ def recoverEVmutFeatures(SAVs):
     LOGGER.timeit('_EVmut')
     LOGGER.info('Recovering EVmutation data...')
 
+    # extracts precomputed EVmutation scores for given mutants
+    # NB:
+    # negative DeltaE_epist --> deleterious effect
+    # DeltaE_epist == 0     --> neutral effect (wild-type)
+    # positive DeltaE_epist --> neutral/benign effect
+
     def find_matching_files(file_list, acc, pos):
         match_files = []
         for fname in [f for f in file_list if f.startswith(acc)]:
@@ -70,8 +75,8 @@ def find_matching_files(file_list, acc, pos):
             with open(join(EVmut_dir, fname), 'r') as f:
                 for line in f:
                     if line.startswith(mutant):
-                        l = line.strip().split(';')[4:8]
-                        data.append(l)
+                        ll = line.strip().split(';')[4:8]
+                        data.append(ll)
                         break
         data = np.array(data, dtype=float)
         if len(data) == 0:
@@ -82,3 +87,10 @@ def find_matching_files(file_list, acc, pos):
 
     LOGGER.report('EVmutation scores recovered in %.1fs.', '_EVmut')
     return features
+
+
+def calcEVmutPathClasses(EVmut_score):
+    c = -SETTINGS.get('EVmutation_metrics')['optimal cutoff']
+    EVmut_class = np.where(EVmut_score < c, 'deleterious', 'neutral')
+    EVmut_class[np.isnan(EVmut_score)] = '?'
+    return EVmut_class
diff --git a/rhapsody/features/PDB.py b/rhapsody/features/PDB.py
@@ -3,16 +3,23 @@
 PDB-based structural and dynamical features in a single place, and a
 function for using the latter on a list of PDB SAV coordinates."""
 
+import numpy as np
+import pickle
+import datetime
+import os
+from tqdm import tqdm
 from prody import Atomic, parsePDB, writePDB, LOGGER, SETTINGS
 from prody import GNM, ANM, calcSqFlucts
 from prody import calcPerturbResponse, calcMechStiff
 # from prody import calcMBS
 from prody import reduceModel, sliceModel
 from prody import execDSSP, parseDSSP
-import numpy as np
-import pickle
-import datetime
-import os
+
+__author__ = "Luca Ponzoni"
+__date__ = "December 2019"
+__maintainer__ = "Luca Ponzoni"
+__email__ = "[email protected]"
+__status__ = "Production"
 
 __all__ = ['STR_FEATS', 'DYN_FEATS', 'PDB_FEATS',
            'PDBfeatures', 'calcPDBfeatures']
@@ -209,6 +216,9 @@ def savePickle(self, folder=None, filename=None):
         LOGGER.info("Pickle '{}' saved.".format(filename))
         return pickle_path
 
+    def resetTimestamp(self):
+        self.timestamp = str(datetime.datetime.utcnow())
+
     def setNumModes(self, n_modes):
         """Sets the number of ENM modes to be computed. If different from
         the number provided at instantiation, any precomputed features will
@@ -643,7 +653,7 @@ def calcSelFeatures(self, chain='all', resid=None, sel_feats=None):
 
 
 def calcPDBfeatures(mapped_SAVs, sel_feats=None, custom_PDB=None,
-                    refresh=False):
+                    refresh=False, status_file=None, status_prefix=None):
     LOGGER.info('Computing structural and dynamical features '
                 'from PDB structures...')
     LOGGER.timeit('_calcPDBFeats')
@@ -662,24 +672,40 @@ def calcPDBfeatures(mapped_SAVs, sel_feats=None, custom_PDB=None,
     else:
         # no need to sort when using a custom PDB or PDBID
         sorting_map = range(num_SAVs)
+    # define how to report progress
+    if status_prefix is None:
+        status_prefix = ''
+    bar_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]'
+    if status_file is not None:
+        status_file = open(status_file, 'w')
+        progress_bar = tqdm(
+            [(i, mapped_SAVs[i]) for i in sorting_map], file=status_file,
+            bar_format=bar_format+'\n')
+    else:
+        progress_bar = tqdm(
+            [(i, mapped_SAVs[i]) for i in sorting_map], bar_format=bar_format)
     cache = {'PDBID': None, 'chain': None, 'obj': None}
     count = 0
-    for indx, SAV in [(i, mapped_SAVs[i]) for i in sorting_map]:
+    for indx, SAV in progress_bar:
         count += 1
         if SAV['PDB size'] == 0:
             # SAV could not be mapped to PDB
             _features = np.nan
             SAV_coords = SAV['SAV coords']
-            LOGGER.info(f"[{count}/{num_SAVs}] SAV '{SAV_coords}' "
-                        "couldn't be mapped to PDB")
+            progress_msg = f"{status_prefix}No PDB for SAV '{SAV_coords}'"
         else:
             parsed_PDB_coords = SAV['PDB SAV coords'].split()
             PDBID, chID = parsed_PDB_coords[:2]
             resid = int(parsed_PDB_coords[2])
-            LOGGER.info("[{}/{}] Analizing mutation site {}:{} {}..."
-                        .format(count, num_SAVs, PDBID, chID, resid))
+            progress_msg = status_prefix + \
+                f'Analizing mutation site {PDBID}:{chID} {resid}'
             # chID == "?" stands for "empty space"
             chID = " " if chID == "?" else chID
+        # report progress
+        # LOGGER.info(f"[{count}/{num_SAVs}] {progress_msg}...")
+        progress_bar.set_description(progress_msg)
+        # compute PDB features, if possible
+        if SAV['PDB size'] != 0:
             if PDBID == cache['PDBID']:
                 # use PDBfeatures instance from previous iteration
                 obj = cache['obj']
@@ -725,4 +751,6 @@ def calcPDBfeatures(mapped_SAVs, sel_feats=None, custom_PDB=None,
            and custom_PDB is None:
             cache['obj'].savePickle()
     LOGGER.report('PDB features have been computed in %.1fs.', '_calcPDBFeats')
+    if status_file:
+        os.remove(status_file.name)
     return features
diff --git a/rhapsody/features/Pfam.py b/rhapsody/features/Pfam.py
@@ -3,10 +3,18 @@
 coevolution properties of an amino acid substitution from a Pfam
 multiple sequence alignment."""
 
+import os
 import numpy as np
+from tqdm import tqdm
 from prody import LOGGER
 from .Uniprot import UniprotMapping
 
+__author__ = "Luca Ponzoni"
+__date__ = "December 2019"
+__maintainer__ = "Luca Ponzoni"
+__email__ = "[email protected]"
+__status__ = "Production"
+
 __all__ = ['PFAM_FEATS', 'calcPfamFeatures']
 
 PFAM_FEATS = ['entropy', 'ranked_MI']
@@ -38,7 +46,7 @@ def calcNormRank(array, i):
         return feats
 
 
-def calcPfamFeatures(SAVs):
+def calcPfamFeatures(SAVs, status_file=None, status_prefix=None):
     LOGGER.info('Computing sequence properties from Pfam domains...')
     LOGGER.timeit('_calcPfamFeats')
     # sort SAVs, so to group together those
@@ -49,14 +57,29 @@ def calcPfamFeatures(SAVs):
     num_SAVs = len(SAVs)
     feat_dtype = np.dtype([('entropy', 'f'), ('ranked_MI', 'f')])
     features = np.zeros(num_SAVs, dtype=feat_dtype)
+    # define how to report progress
+    if status_prefix is None:
+        status_prefix = ''
+    bar_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]'
+    if status_file is not None:
+        status_file = open(status_file, 'w')
+        progress_bar = tqdm(
+            [(i, SAVs[i]) for i in sorting_map], file=status_file,
+            bar_format=bar_format+'\n')
+    else:
+        progress_bar = tqdm(
+            [(i, SAVs[i]) for i in sorting_map], bar_format=bar_format)
     # map to Pfam domains using UniprotMapping class
     cache = {'acc': None, 'obj': None, 'warn': ''}
     count = 0
-    for indx, SAV in [(i, SAVs[i]) for i in sorting_map]:
+    for indx, SAV in progress_bar:
         count += 1
         acc, pos, aa1, aa2 = SAV.split()
         pos = int(pos)
-        LOGGER.info(f"[{count}/{num_SAVs}] Mapping SAV '{SAV}' to Pfam...")
+        # report progress
+        progress_msg = f"{status_prefix}Mapping SAV '{SAV}' to Pfam"
+        # LOGGER.info(f"[{count}/{num_SAVs}] {progress_msg}...")
+        progress_bar.set_description(progress_msg)
         # map to Pfam domains using 'UniprotMapping' class
         if acc == cache['acc']:
             # use object from previous iteration
@@ -102,4 +125,6 @@ def calcPfamFeatures(SAVs):
             cache['obj'].savePickle()
     LOGGER.report('SAVs have been mapped on Pfam domains and sequence '
                   'properties have been computed in %.1fs.', '_calcPfamFeats')
+    if status_file:
+        os.remove(status_file.name)
     return features
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		include rhapsody/VERSION
		include rhapsody/data.tar.gz