Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update to v.0.9.7 #11

Merged
merged 24 commits into from
Dec 13, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ before_install:
# install dependencies for rhapsody
- conda install requests numpy scikit-learn
- conda install matplotlib
- pip install tqdm

install:
# - python setup.py install
Expand All @@ -46,4 +47,5 @@ install:
script:
# - pytest test1.py
- cd test/test-1; python test-1.py
- cd ../test-2; python test-2.py

2 changes: 2 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
include rhapsody/VERSION
include rhapsody/data.tar.gz
1 change: 0 additions & 1 deletion VERSION

This file was deleted.

26 changes: 26 additions & 0 deletions docs/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
Example of documentation string:

function(arg, kwarg=None):
""" General description. Please use this syntax rules:

- :class:`~numpy.ndarray`
- :meth:`setSomething` or :meth:`.setSomething`
- :func:`someFunction` or :func:`.someFunction`
- **None**
You can insert refs too ([ref]_).

:arg arg: detailed description.
This is how you make a bullet list in an argument's description:

- if *arg* = **None** (default), blah blah
- if *arg* = ``'something else'``, blah blah
:type arg: str, list, bool, whatever
:arg kwarg: blah blah
:type kwarg: str
:return: something
:rtype: str

.. [ref] Ponzoni L, Bahar I. Structural dynamics is a determinant of
the functional significance of missense variants. *PNAS* **2018**
115 (16) 4164-4169.
"""
14 changes: 13 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', '_backup', 'Thumbs.db', '.DS_Store', '**tar.gz**']
exclude_patterns = ['_build', '_backup', 'Thumbs.db', '.DS_Store',
'**tar.gz**']

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
Expand All @@ -60,6 +61,17 @@
# autosummary_generate = True


# functions for including __init__ documentation
def skip(app, what, name, obj, would_skip, options):
if name == "__init__":
return False
return would_skip


def setup(app):
app.connect("autodoc-skip-member", skip)


# -- Options for HTML output -------------------------------------------------

# The theme to use for HTML and HTML Help pages. See the documentation for
Expand Down
8 changes: 8 additions & 0 deletions docs/rhapsody.utils.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@ rhapsody.utils package
Submodules
----------

rhapsody.utils.misc module
--------------------------

.. automodule:: rhapsody.utils.misc
:members:
:undoc-members:
:show-inheritance:

rhapsody.utils.settings module
------------------------------

Expand Down
1 change: 1 addition & 0 deletions rhapsody/VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.9.7
8 changes: 7 additions & 1 deletion rhapsody/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,17 @@
from os import path

here = path.abspath(path.dirname(__file__))
with open(path.join(here, '../VERSION'), encoding='utf-8') as f:
with open(path.join(here, 'VERSION'), encoding='utf-8') as f:
__version__ = f.read()

__release__ = __version__

__author__ = "Luca Ponzoni"
__date__ = "December 2019"
__maintainer__ = "Luca Ponzoni"
__email__ = "[email protected]"
__status__ = "Production"

__all__ = []

from . import utils
Expand Down
6 changes: 6 additions & 0 deletions rhapsody/features/BLOSUM.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@
import numpy as np
from Bio.SubsMat.MatrixInfo import blosum62

__author__ = "Luca Ponzoni"
__date__ = "December 2019"
__maintainer__ = "Luca Ponzoni"
__email__ = "[email protected]"
__status__ = "Production"

__all__ = ['BLOSUM_FEATS', 'calcBLOSUMfeatures']

BLOSUM_FEATS = ['BLOSUM']
Expand Down
28 changes: 20 additions & 8 deletions rhapsody/features/EVmutation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,11 @@
from os.path import splitext, join, basename
from prody import SETTINGS, LOGGER

# extract precomputed EVmutation scores for given mutants
# NB:
# negative DeltaE_epist --> deleterious effect
# DeltaE_epist == 0 --> neutral effect (wild-type)
# positive DeltaE_epist --> neutral/benign effect

__author__ = "Luca Ponzoni"
__date__ = "December 2019"
__maintainer__ = "Luca Ponzoni"
__email__ = "[email protected]"
__status__ = "Production"

__all__ = ['EVMUT_FEATS', 'recoverEVmutFeatures']

Expand All @@ -35,6 +34,12 @@ def recoverEVmutFeatures(SAVs):
LOGGER.timeit('_EVmut')
LOGGER.info('Recovering EVmutation data...')

# extracts precomputed EVmutation scores for given mutants
# NB:
# negative DeltaE_epist --> deleterious effect
# DeltaE_epist == 0 --> neutral effect (wild-type)
# positive DeltaE_epist --> neutral/benign effect

def find_matching_files(file_list, acc, pos):
match_files = []
for fname in [f for f in file_list if f.startswith(acc)]:
Expand Down Expand Up @@ -70,8 +75,8 @@ def find_matching_files(file_list, acc, pos):
with open(join(EVmut_dir, fname), 'r') as f:
for line in f:
if line.startswith(mutant):
l = line.strip().split(';')[4:8]
data.append(l)
ll = line.strip().split(';')[4:8]
data.append(ll)
break
data = np.array(data, dtype=float)
if len(data) == 0:
Expand All @@ -82,3 +87,10 @@ def find_matching_files(file_list, acc, pos):

LOGGER.report('EVmutation scores recovered in %.1fs.', '_EVmut')
return features


def calcEVmutPathClasses(EVmut_score):
c = -SETTINGS.get('EVmutation_metrics')['optimal cutoff']
EVmut_class = np.where(EVmut_score < c, 'deleterious', 'neutral')
EVmut_class[np.isnan(EVmut_score)] = '?'
return EVmut_class
48 changes: 38 additions & 10 deletions rhapsody/features/PDB.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,23 @@
PDB-based structural and dynamical features in a single place, and a
function for using the latter on a list of PDB SAV coordinates."""

import numpy as np
import pickle
import datetime
import os
from tqdm import tqdm
from prody import Atomic, parsePDB, writePDB, LOGGER, SETTINGS
from prody import GNM, ANM, calcSqFlucts
from prody import calcPerturbResponse, calcMechStiff
# from prody import calcMBS
from prody import reduceModel, sliceModel
from prody import execDSSP, parseDSSP
import numpy as np
import pickle
import datetime
import os

__author__ = "Luca Ponzoni"
__date__ = "December 2019"
__maintainer__ = "Luca Ponzoni"
__email__ = "[email protected]"
__status__ = "Production"

__all__ = ['STR_FEATS', 'DYN_FEATS', 'PDB_FEATS',
'PDBfeatures', 'calcPDBfeatures']
Expand Down Expand Up @@ -209,6 +216,9 @@ def savePickle(self, folder=None, filename=None):
LOGGER.info("Pickle '{}' saved.".format(filename))
return pickle_path

def resetTimestamp(self):
self.timestamp = str(datetime.datetime.utcnow())

def setNumModes(self, n_modes):
"""Sets the number of ENM modes to be computed. If different from
the number provided at instantiation, any precomputed features will
Expand Down Expand Up @@ -643,7 +653,7 @@ def calcSelFeatures(self, chain='all', resid=None, sel_feats=None):


def calcPDBfeatures(mapped_SAVs, sel_feats=None, custom_PDB=None,
refresh=False):
refresh=False, status_file=None, status_prefix=None):
LOGGER.info('Computing structural and dynamical features '
'from PDB structures...')
LOGGER.timeit('_calcPDBFeats')
Expand All @@ -662,24 +672,40 @@ def calcPDBfeatures(mapped_SAVs, sel_feats=None, custom_PDB=None,
else:
# no need to sort when using a custom PDB or PDBID
sorting_map = range(num_SAVs)
# define how to report progress
if status_prefix is None:
status_prefix = ''
bar_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]'
if status_file is not None:
status_file = open(status_file, 'w')
progress_bar = tqdm(
[(i, mapped_SAVs[i]) for i in sorting_map], file=status_file,
bar_format=bar_format+'\n')
else:
progress_bar = tqdm(
[(i, mapped_SAVs[i]) for i in sorting_map], bar_format=bar_format)
cache = {'PDBID': None, 'chain': None, 'obj': None}
count = 0
for indx, SAV in [(i, mapped_SAVs[i]) for i in sorting_map]:
for indx, SAV in progress_bar:
count += 1
if SAV['PDB size'] == 0:
# SAV could not be mapped to PDB
_features = np.nan
SAV_coords = SAV['SAV coords']
LOGGER.info(f"[{count}/{num_SAVs}] SAV '{SAV_coords}' "
"couldn't be mapped to PDB")
progress_msg = f"{status_prefix}No PDB for SAV '{SAV_coords}'"
else:
parsed_PDB_coords = SAV['PDB SAV coords'].split()
PDBID, chID = parsed_PDB_coords[:2]
resid = int(parsed_PDB_coords[2])
LOGGER.info("[{}/{}] Analizing mutation site {}:{} {}..."
.format(count, num_SAVs, PDBID, chID, resid))
progress_msg = status_prefix + \
f'Analizing mutation site {PDBID}:{chID} {resid}'
# chID == "?" stands for "empty space"
chID = " " if chID == "?" else chID
# report progress
# LOGGER.info(f"[{count}/{num_SAVs}] {progress_msg}...")
progress_bar.set_description(progress_msg)
# compute PDB features, if possible
if SAV['PDB size'] != 0:
if PDBID == cache['PDBID']:
# use PDBfeatures instance from previous iteration
obj = cache['obj']
Expand Down Expand Up @@ -725,4 +751,6 @@ def calcPDBfeatures(mapped_SAVs, sel_feats=None, custom_PDB=None,
and custom_PDB is None:
cache['obj'].savePickle()
LOGGER.report('PDB features have been computed in %.1fs.', '_calcPDBFeats')
if status_file:
os.remove(status_file.name)
return features
31 changes: 28 additions & 3 deletions rhapsody/features/Pfam.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,18 @@
coevolution properties of an amino acid substitution from a Pfam
multiple sequence alignment."""

import os
import numpy as np
from tqdm import tqdm
from prody import LOGGER
from .Uniprot import UniprotMapping

__author__ = "Luca Ponzoni"
__date__ = "December 2019"
__maintainer__ = "Luca Ponzoni"
__email__ = "[email protected]"
__status__ = "Production"

__all__ = ['PFAM_FEATS', 'calcPfamFeatures']

PFAM_FEATS = ['entropy', 'ranked_MI']
Expand Down Expand Up @@ -38,7 +46,7 @@ def calcNormRank(array, i):
return feats


def calcPfamFeatures(SAVs):
def calcPfamFeatures(SAVs, status_file=None, status_prefix=None):
LOGGER.info('Computing sequence properties from Pfam domains...')
LOGGER.timeit('_calcPfamFeats')
# sort SAVs, so to group together those
Expand All @@ -49,14 +57,29 @@ def calcPfamFeatures(SAVs):
num_SAVs = len(SAVs)
feat_dtype = np.dtype([('entropy', 'f'), ('ranked_MI', 'f')])
features = np.zeros(num_SAVs, dtype=feat_dtype)
# define how to report progress
if status_prefix is None:
status_prefix = ''
bar_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]'
if status_file is not None:
status_file = open(status_file, 'w')
progress_bar = tqdm(
[(i, SAVs[i]) for i in sorting_map], file=status_file,
bar_format=bar_format+'\n')
else:
progress_bar = tqdm(
[(i, SAVs[i]) for i in sorting_map], bar_format=bar_format)
# map to Pfam domains using UniprotMapping class
cache = {'acc': None, 'obj': None, 'warn': ''}
count = 0
for indx, SAV in [(i, SAVs[i]) for i in sorting_map]:
for indx, SAV in progress_bar:
count += 1
acc, pos, aa1, aa2 = SAV.split()
pos = int(pos)
LOGGER.info(f"[{count}/{num_SAVs}] Mapping SAV '{SAV}' to Pfam...")
# report progress
progress_msg = f"{status_prefix}Mapping SAV '{SAV}' to Pfam"
# LOGGER.info(f"[{count}/{num_SAVs}] {progress_msg}...")
progress_bar.set_description(progress_msg)
# map to Pfam domains using 'UniprotMapping' class
if acc == cache['acc']:
# use object from previous iteration
Expand Down Expand Up @@ -102,4 +125,6 @@ def calcPfamFeatures(SAVs):
cache['obj'].savePickle()
LOGGER.report('SAVs have been mapped on Pfam domains and sequence '
'properties have been computed in %.1fs.', '_calcPfamFeats')
if status_file:
os.remove(status_file.name)
return features
Loading