forked from luponzo86/rhapsody
-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #11 from luponzo86/master
update to v.0.9.7
- Loading branch information
Showing
32 changed files
with
6,703 additions
and
167 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
include rhapsody/VERSION | ||
include rhapsody/data.tar.gz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
Example of documentation string: | ||
|
||
function(arg, kwarg=None): | ||
""" General description. Please use this syntax rules: | ||
|
||
- :class:`~numpy.ndarray` | ||
- :meth:`setSomething` or :meth:`.setSomething` | ||
- :func:`someFunction` or :func:`.someFunction` | ||
- **None** | ||
You can insert refs too ([ref]_). | ||
|
||
:arg arg: detailed description. | ||
This is how you make a bullet list in an argument's description: | ||
|
||
- if *arg* = **None** (default), blah blah | ||
- if *arg* = ``'something else'``, blah blah | ||
:type arg: str, list, bool, whatever | ||
:arg kwarg: blah blah | ||
:type kwarg: str | ||
:return: something | ||
:rtype: str | ||
|
||
.. [ref] Ponzoni L, Bahar I. Structural dynamics is a determinant of | ||
the functional significance of missense variants. *PNAS* **2018** | ||
115 (16) 4164-4169. | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
0.9.7 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,11 +6,17 @@ | |
from os import path | ||
|
||
here = path.abspath(path.dirname(__file__)) | ||
with open(path.join(here, '../VERSION'), encoding='utf-8') as f: | ||
with open(path.join(here, 'VERSION'), encoding='utf-8') as f: | ||
__version__ = f.read() | ||
|
||
__release__ = __version__ | ||
|
||
__author__ = "Luca Ponzoni" | ||
__date__ = "December 2019" | ||
__maintainer__ = "Luca Ponzoni" | ||
__email__ = "[email protected]" | ||
__status__ = "Production" | ||
|
||
__all__ = [] | ||
|
||
from . import utils | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,12 @@ | |
import numpy as np | ||
from Bio.SubsMat.MatrixInfo import blosum62 | ||
|
||
__author__ = "Luca Ponzoni" | ||
__date__ = "December 2019" | ||
__maintainer__ = "Luca Ponzoni" | ||
__email__ = "[email protected]" | ||
__status__ = "Production" | ||
|
||
__all__ = ['BLOSUM_FEATS', 'calcBLOSUMfeatures'] | ||
|
||
BLOSUM_FEATS = ['BLOSUM'] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,12 +7,11 @@ | |
from os.path import splitext, join, basename | ||
from prody import SETTINGS, LOGGER | ||
|
||
# extract precomputed EVmutation scores for given mutants | ||
# NB: | ||
# negative DeltaE_epist --> deleterious effect | ||
# DeltaE_epist == 0 --> neutral effect (wild-type) | ||
# positive DeltaE_epist --> neutral/benign effect | ||
|
||
__author__ = "Luca Ponzoni" | ||
__date__ = "December 2019" | ||
__maintainer__ = "Luca Ponzoni" | ||
__email__ = "[email protected]" | ||
__status__ = "Production" | ||
|
||
__all__ = ['EVMUT_FEATS', 'recoverEVmutFeatures'] | ||
|
||
|
@@ -35,6 +34,12 @@ def recoverEVmutFeatures(SAVs): | |
LOGGER.timeit('_EVmut') | ||
LOGGER.info('Recovering EVmutation data...') | ||
|
||
# extracts precomputed EVmutation scores for given mutants | ||
# NB: | ||
# negative DeltaE_epist --> deleterious effect | ||
# DeltaE_epist == 0 --> neutral effect (wild-type) | ||
# positive DeltaE_epist --> neutral/benign effect | ||
|
||
def find_matching_files(file_list, acc, pos): | ||
match_files = [] | ||
for fname in [f for f in file_list if f.startswith(acc)]: | ||
|
@@ -70,8 +75,8 @@ def find_matching_files(file_list, acc, pos): | |
with open(join(EVmut_dir, fname), 'r') as f: | ||
for line in f: | ||
if line.startswith(mutant): | ||
l = line.strip().split(';')[4:8] | ||
data.append(l) | ||
ll = line.strip().split(';')[4:8] | ||
data.append(ll) | ||
break | ||
data = np.array(data, dtype=float) | ||
if len(data) == 0: | ||
|
@@ -82,3 +87,10 @@ def find_matching_files(file_list, acc, pos): | |
|
||
LOGGER.report('EVmutation scores recovered in %.1fs.', '_EVmut') | ||
return features | ||
|
||
|
||
def calcEVmutPathClasses(EVmut_score): | ||
c = -SETTINGS.get('EVmutation_metrics')['optimal cutoff'] | ||
EVmut_class = np.where(EVmut_score < c, 'deleterious', 'neutral') | ||
EVmut_class[np.isnan(EVmut_score)] = '?' | ||
return EVmut_class |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,16 +3,23 @@ | |
PDB-based structural and dynamical features in a single place, and a | ||
function for using the latter on a list of PDB SAV coordinates.""" | ||
|
||
import numpy as np | ||
import pickle | ||
import datetime | ||
import os | ||
from tqdm import tqdm | ||
from prody import Atomic, parsePDB, writePDB, LOGGER, SETTINGS | ||
from prody import GNM, ANM, calcSqFlucts | ||
from prody import calcPerturbResponse, calcMechStiff | ||
# from prody import calcMBS | ||
from prody import reduceModel, sliceModel | ||
from prody import execDSSP, parseDSSP | ||
import numpy as np | ||
import pickle | ||
import datetime | ||
import os | ||
|
||
__author__ = "Luca Ponzoni" | ||
__date__ = "December 2019" | ||
__maintainer__ = "Luca Ponzoni" | ||
__email__ = "[email protected]" | ||
__status__ = "Production" | ||
|
||
__all__ = ['STR_FEATS', 'DYN_FEATS', 'PDB_FEATS', | ||
'PDBfeatures', 'calcPDBfeatures'] | ||
|
@@ -209,6 +216,9 @@ def savePickle(self, folder=None, filename=None): | |
LOGGER.info("Pickle '{}' saved.".format(filename)) | ||
return pickle_path | ||
|
||
def resetTimestamp(self): | ||
self.timestamp = str(datetime.datetime.utcnow()) | ||
|
||
def setNumModes(self, n_modes): | ||
"""Sets the number of ENM modes to be computed. If different from | ||
the number provided at instantiation, any precomputed features will | ||
|
@@ -643,7 +653,7 @@ def calcSelFeatures(self, chain='all', resid=None, sel_feats=None): | |
|
||
|
||
def calcPDBfeatures(mapped_SAVs, sel_feats=None, custom_PDB=None, | ||
refresh=False): | ||
refresh=False, status_file=None, status_prefix=None): | ||
LOGGER.info('Computing structural and dynamical features ' | ||
'from PDB structures...') | ||
LOGGER.timeit('_calcPDBFeats') | ||
|
@@ -662,24 +672,40 @@ def calcPDBfeatures(mapped_SAVs, sel_feats=None, custom_PDB=None, | |
else: | ||
# no need to sort when using a custom PDB or PDBID | ||
sorting_map = range(num_SAVs) | ||
# define how to report progress | ||
if status_prefix is None: | ||
status_prefix = '' | ||
bar_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]' | ||
if status_file is not None: | ||
status_file = open(status_file, 'w') | ||
progress_bar = tqdm( | ||
[(i, mapped_SAVs[i]) for i in sorting_map], file=status_file, | ||
bar_format=bar_format+'\n') | ||
else: | ||
progress_bar = tqdm( | ||
[(i, mapped_SAVs[i]) for i in sorting_map], bar_format=bar_format) | ||
cache = {'PDBID': None, 'chain': None, 'obj': None} | ||
count = 0 | ||
for indx, SAV in [(i, mapped_SAVs[i]) for i in sorting_map]: | ||
for indx, SAV in progress_bar: | ||
count += 1 | ||
if SAV['PDB size'] == 0: | ||
# SAV could not be mapped to PDB | ||
_features = np.nan | ||
SAV_coords = SAV['SAV coords'] | ||
LOGGER.info(f"[{count}/{num_SAVs}] SAV '{SAV_coords}' " | ||
"couldn't be mapped to PDB") | ||
progress_msg = f"{status_prefix}No PDB for SAV '{SAV_coords}'" | ||
else: | ||
parsed_PDB_coords = SAV['PDB SAV coords'].split() | ||
PDBID, chID = parsed_PDB_coords[:2] | ||
resid = int(parsed_PDB_coords[2]) | ||
LOGGER.info("[{}/{}] Analizing mutation site {}:{} {}..." | ||
.format(count, num_SAVs, PDBID, chID, resid)) | ||
progress_msg = status_prefix + \ | ||
f'Analizing mutation site {PDBID}:{chID} {resid}' | ||
# chID == "?" stands for "empty space" | ||
chID = " " if chID == "?" else chID | ||
# report progress | ||
# LOGGER.info(f"[{count}/{num_SAVs}] {progress_msg}...") | ||
progress_bar.set_description(progress_msg) | ||
# compute PDB features, if possible | ||
if SAV['PDB size'] != 0: | ||
if PDBID == cache['PDBID']: | ||
# use PDBfeatures instance from previous iteration | ||
obj = cache['obj'] | ||
|
@@ -725,4 +751,6 @@ def calcPDBfeatures(mapped_SAVs, sel_feats=None, custom_PDB=None, | |
and custom_PDB is None: | ||
cache['obj'].savePickle() | ||
LOGGER.report('PDB features have been computed in %.1fs.', '_calcPDBFeats') | ||
if status_file: | ||
os.remove(status_file.name) | ||
return features |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,10 +3,18 @@ | |
coevolution properties of an amino acid substitution from a Pfam | ||
multiple sequence alignment.""" | ||
|
||
import os | ||
import numpy as np | ||
from tqdm import tqdm | ||
from prody import LOGGER | ||
from .Uniprot import UniprotMapping | ||
|
||
__author__ = "Luca Ponzoni" | ||
__date__ = "December 2019" | ||
__maintainer__ = "Luca Ponzoni" | ||
__email__ = "[email protected]" | ||
__status__ = "Production" | ||
|
||
__all__ = ['PFAM_FEATS', 'calcPfamFeatures'] | ||
|
||
PFAM_FEATS = ['entropy', 'ranked_MI'] | ||
|
@@ -38,7 +46,7 @@ def calcNormRank(array, i): | |
return feats | ||
|
||
|
||
def calcPfamFeatures(SAVs): | ||
def calcPfamFeatures(SAVs, status_file=None, status_prefix=None): | ||
LOGGER.info('Computing sequence properties from Pfam domains...') | ||
LOGGER.timeit('_calcPfamFeats') | ||
# sort SAVs, so to group together those | ||
|
@@ -49,14 +57,29 @@ def calcPfamFeatures(SAVs): | |
num_SAVs = len(SAVs) | ||
feat_dtype = np.dtype([('entropy', 'f'), ('ranked_MI', 'f')]) | ||
features = np.zeros(num_SAVs, dtype=feat_dtype) | ||
# define how to report progress | ||
if status_prefix is None: | ||
status_prefix = '' | ||
bar_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]' | ||
if status_file is not None: | ||
status_file = open(status_file, 'w') | ||
progress_bar = tqdm( | ||
[(i, SAVs[i]) for i in sorting_map], file=status_file, | ||
bar_format=bar_format+'\n') | ||
else: | ||
progress_bar = tqdm( | ||
[(i, SAVs[i]) for i in sorting_map], bar_format=bar_format) | ||
# map to Pfam domains using UniprotMapping class | ||
cache = {'acc': None, 'obj': None, 'warn': ''} | ||
count = 0 | ||
for indx, SAV in [(i, SAVs[i]) for i in sorting_map]: | ||
for indx, SAV in progress_bar: | ||
count += 1 | ||
acc, pos, aa1, aa2 = SAV.split() | ||
pos = int(pos) | ||
LOGGER.info(f"[{count}/{num_SAVs}] Mapping SAV '{SAV}' to Pfam...") | ||
# report progress | ||
progress_msg = f"{status_prefix}Mapping SAV '{SAV}' to Pfam" | ||
# LOGGER.info(f"[{count}/{num_SAVs}] {progress_msg}...") | ||
progress_bar.set_description(progress_msg) | ||
# map to Pfam domains using 'UniprotMapping' class | ||
if acc == cache['acc']: | ||
# use object from previous iteration | ||
|
@@ -102,4 +125,6 @@ def calcPfamFeatures(SAVs): | |
cache['obj'].savePickle() | ||
LOGGER.report('SAVs have been mapped on Pfam domains and sequence ' | ||
'properties have been computed in %.1fs.', '_calcPfamFeats') | ||
if status_file: | ||
os.remove(status_file.name) | ||
return features |
Oops, something went wrong.