-
Notifications
You must be signed in to change notification settings - Fork 18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Concurrence #28
Concurrence #28
Changes from 19 commits
049f147
c3d5a0f
c0a278c
d215016
16bb06b
5d9a440
1ab3d51
bac1610
42f904a
c758fe9
bba923f
1fef87a
7275242
e874747
fa47838
30ecd46
f31130e
9c9987b
5d8184d
b84283e
cdfd277
a41bbb1
1d59768
64d3deb
626456a
91e3092
24f9928
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,5 +8,6 @@ omit = | |
*/_version.py | ||
exclude_lines = | ||
pragma: no cover | ||
-no-cov- | ||
def __repr__ | ||
raise NotImplementedError |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,246 @@ | ||
import itertools | ||
import mdtraj as md | ||
import numpy as np | ||
|
||
try: | ||
import matplotlib.pyplot as plt | ||
except ImportError: | ||
HAS_MATPLOTLIB = False | ||
else: | ||
HAS_MATPLOTLIB = True | ||
|
||
|
||
class Concurrence(object): | ||
"""Superclass for contact concurrence objects. | ||
|
||
Contact concurrences measure what contacts occur simultaneously in a | ||
trajectory. When defining states, one usually wants to characterize | ||
based on multiple contacts that are made simultaneously; contact | ||
concurrences makes it easier to identify those. | ||
|
||
Parameters | ||
---------- | ||
values : list of list of bool | ||
the whether a contact is present for each contact pair at each | ||
point in time; outer list is length number of frames, inner list | ||
in length number of (included) contacts | ||
labels : list of string | ||
labels for each contact pair | ||
""" | ||
def __init__(self, values, labels=None): | ||
self.values = values | ||
self.labels = labels | ||
|
||
# @property | ||
# def lifetimes(self): | ||
# pass | ||
|
||
def set_labels(self, labels): | ||
"""Set the contact labels | ||
|
||
Parameters | ||
---------- | ||
labels : list of string | ||
labels for each contact pair | ||
""" | ||
self.labels = labels | ||
|
||
def __getitem__(self, label): | ||
idx = self.labels.index(label) | ||
return self.values[idx] | ||
|
||
# temporarily removed until we find a good metric here; this metric did | ||
# not seem optimal and I stopped using it, so remove from code before | ||
# release (can add back in later) | ||
# def coincidence(self, label_list): | ||
# this_list = np.asarray(self[label_list[0]]) | ||
# coincidence_list = this_list | ||
# norm_sq = sum(this_list) | ||
# for label in label_list[1:]: | ||
# this_list = np.asarray(self[label]) | ||
# coincidence_list &= this_list | ||
# norm_sq *= sum(this_list) | ||
|
||
# return sum(coincidence_list) / np.sqrt(norm_sq) | ||
|
||
|
||
class AtomContactConcurrence(Concurrence): | ||
"""Contact concurrences for atom contacts. | ||
|
||
Parameters | ||
---------- | ||
trajectory : :class:`mdtraj.Trajectory` | ||
the trajectory to analyze | ||
atom_contacts : list | ||
output from ``contact_map.atom_contacts.most_common()`` | ||
cutoff : float | ||
cutoff, in nm. Should be the same as used in the contact map. | ||
""" | ||
def __init__(self, trajectory, atom_contacts, cutoff=0.45): | ||
# TODO: the use of atom_contacts as input from most_common is weird | ||
atom_pairs = [[contact[0][0].index, contact[0][1].index] | ||
for contact in atom_contacts] | ||
labels = [str(contact[0]) for contact in atom_contacts] | ||
distances = md.compute_distances(trajectory, atom_pairs=atom_pairs) | ||
vector_f = np.vectorize(lambda d: d < cutoff) | ||
values = list(map(list, zip(*vector_f(distances)))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please add a comment for this magic There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and this could be changed into: |
||
super(AtomContactConcurrence, self).__init__(values=values, | ||
labels=labels) | ||
|
||
|
||
class ResidueContactConcurrence(Concurrence): | ||
"""Contact concurrences for residue contacts. | ||
|
||
Parameters | ||
---------- | ||
trajectory : :class:`mdtraj.Trajectory` | ||
the trajectory to analyze | ||
residue_contacts : list | ||
output from ``contact_map.residue_contacts.most_common()`` | ||
cutoff : float | ||
cutoff, in nm. Should be the same as used in the contact map. | ||
select : string | ||
additional atom selection string for MDTraj; defaults to "and symbol | ||
!= 'H'" | ||
""" | ||
def __init__(self, trajectory, residue_contacts, cutoff=0.45, | ||
select="and symbol != 'H'"): | ||
# TODO: the use of residue_contacts as input from most_common is weird | ||
residue_pairs = [[contact[0][0], contact[0][1]] | ||
for contact in residue_contacts] | ||
labels = [str(contact[0]) for contact in residue_contacts] | ||
values = [] | ||
select_residue = lambda idx: trajectory.topology.select( | ||
"resid " + str(idx) + " " + select | ||
) | ||
for res_A, res_B in residue_pairs: | ||
atoms_A = select_residue(res_A.index) | ||
atoms_B = select_residue(res_B.index) | ||
atom_pairs = itertools.product(atoms_A, atoms_B) | ||
distances = md.compute_distances(trajectory, | ||
atom_pairs=atom_pairs) | ||
min_dists = [min(dists) for dists in distances] | ||
values.append(list(map(lambda d: d < cutoff, min_dists))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. list comprehension should be more readable |
||
|
||
super(ResidueContactConcurrence, self).__init__(values=values, | ||
labels=labels) | ||
|
||
|
||
class ConcurrencePlotter(object): | ||
"""Plot manager for contact concurrences. | ||
|
||
Parameters | ||
---------- | ||
concurrence : :class:`.Concurrence` | ||
concurrence to plot; default None allows to override later | ||
labels : list of string | ||
labels for the contact pairs, default None will use concurrence | ||
labels if available, integers if not | ||
x_values : list of numeric | ||
values to use for the time axis; default None uses integers starting | ||
at 0 (can be used to assign the actual simulation time to the | ||
x-axis) | ||
""" | ||
def __init__(self, concurrence=None, labels=None, x_values=None): | ||
self.concurrence = concurrence | ||
self.labels = self.get_concurrence_labels(concurrence, labels) | ||
self.x_values = x_values | ||
|
||
@staticmethod | ||
def get_concurrence_labels(concurrence, labels=None): | ||
"""Extract labels for contact from a concurrence object | ||
|
||
If ``labels`` is given, that is returned. Otherwise, the | ||
``concurrence`` is checked for labels, and those are used. If those | ||
are also not available, string forms of integers starting with 0 are | ||
returned. | ||
|
||
|
||
Parameters | ||
---------- | ||
concurrence : :class:`.Concurrence` | ||
concurrence, which may have label information | ||
labels : list of string | ||
labels to use for contacts (optional) | ||
|
||
Returns | ||
------- | ||
list of string | ||
labels to use for contacts | ||
""" | ||
if labels is None: | ||
if concurrence and concurrence.labels is not None: | ||
labels = concurrence.labels | ||
else: | ||
labels = [str(i) for i in range(len(concurrence.values))] | ||
return labels | ||
|
||
@property | ||
def x_values(self): | ||
"""list : values to use for the x-axis (time)""" | ||
x_values = self._x_values | ||
if x_values is None: | ||
x_values = list(range(len(self.concurrence.values[0]))) | ||
return x_values | ||
|
||
@x_values.setter | ||
def x_values(self, x_values): | ||
self._x_values = x_values | ||
|
||
def plot(self, concurrence=None): | ||
"""Contact concurrence plot based on matplotlib | ||
|
||
Parameters | ||
---------- | ||
concurrence : :class:`.Concurrence` | ||
optional; default None uses ``self.concurrence``; this allows | ||
one to override the use of ``self.concurrence`` | ||
|
||
Returns | ||
------- | ||
fig : :class:`.matplotlib.Figure` | ||
ax : :class:`.matplotlib.Axes` | ||
lgd: :class:`.matplotlib.legend.Legend` | ||
objects for matplotlib-based plot of contact concurrences | ||
""" | ||
if not HAS_MATPLOTLIB: # pragma: no cover | ||
raise ImportError("matplotlib not installed") | ||
if concurrence is None: | ||
concurrence = self.concurrence | ||
labels = self.get_concurrence_labels(concurrence=concurrence) | ||
x_values = self.x_values | ||
|
||
fig = plt.figure(1) | ||
ax = fig.add_subplot(111) | ||
|
||
y_val = -1.0 | ||
for label, val_set in zip(labels, concurrence.values): | ||
x_vals = [x for (x, y) in zip(x_values, val_set) if y] | ||
ax.plot(x_vals, [y_val] * len(x_vals), '.', markersize=1, | ||
label=label) | ||
y_val -= 1.0 | ||
|
||
ax.set_ylim(top=0.0) | ||
ax.set_xlim(left=min(x_values), right=max(x_values)) | ||
lgd = ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) | ||
return (fig, ax, lgd) | ||
|
||
|
||
def plot_concurrence(concurrence, labels=None, x_values=None): # -no-cov- | ||
""" | ||
Convenience function for concurrence plots. | ||
|
||
Parameters | ||
---------- | ||
concurrence : :class:`.Concurrence` | ||
concurrence to be plotted | ||
labels: list of string | ||
labels for contacts (optional) | ||
x_values : list of float or list of int | ||
values to use for the x-axis | ||
|
||
See also | ||
-------- | ||
:class:`.ConcurrencePlotter` | ||
""" | ||
return ConcurrencePlotter(concurrence, labels, x_values).plot() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
REMARK 225 HAND-WRITTEN TEST TRAJECTORY | ||
REMARK 225 COMMENTS HERE COUNT FROM 1 (CODE COUNTS FROM 0, PDB FROM 1) | ||
REMARK 225 SYSTEM CONSISTS OF TWO CHAINS: A TWO-RESIDUE "PROTEIN" (CHAIN A) | ||
REMARK 225 AND A "LIGAND" (CHAIN B). | ||
REMARK 225 IN FRAMES 1, 2 AND 5, THE PROTEIN IS IN THE SAME CONFIGURATION | ||
REMARK 225 * FRAME 1 | ||
REMARK 225 * FRAME 2 | ||
REMARK 225 * FRAME 3 | ||
REMARK 225 * FRAME 4 | ||
REMARK 225 * FRAME 5 | ||
REMARK 225 | ||
CRYST1 25.000 25.000 25.000 90.00 90.00 90.00 P 1 1 | ||
HETATM 1 C1 AAA A 1 0.250 2.250 0.000 1.00 0.00 C | ||
HETATM 2 C2 AAA A 1 0.250 1.750 0.000 1.00 0.00 C | ||
HETATM 3 H AAA A 1 0.250 2.750 0.000 1.00 0.00 H | ||
HETATM 4 C1 BBB A 2 0.250 0.750 0.000 1.00 0.00 C | ||
HETATM 5 C2 BBB A 2 0.250 1.250 0.000 1.00 0.00 C | ||
HETATM 6 H BBB A 2 0.250 0.250 0.000 1.00 0.00 H | ||
HETATM 7 C1 LLL B 3 0.750 1.750 0.000 1.00 0.00 C | ||
HETATM 8 C2 LLL B 3 0.750 1.250 0.000 1.00 0.00 C | ||
HETATM 9 H LLL B 3 1.250 1.750 0.000 1.00 0.00 H | ||
ENDMDL | ||
CRYST1 25.000 25.000 25.000 90.00 90.00 90.00 P 1 1 | ||
HETATM 1 C1 AAA A 1 0.250 2.250 0.000 1.00 0.00 C | ||
HETATM 2 C2 AAA A 1 0.250 1.750 0.000 1.00 0.00 C | ||
HETATM 3 H AAA A 1 0.250 2.750 0.000 1.00 0.00 H | ||
HETATM 4 C1 BBB A 2 0.250 0.750 0.000 1.00 0.00 C | ||
HETATM 5 C2 BBB A 2 0.250 1.250 0.000 1.00 0.00 C | ||
HETATM 6 H BBB A 2 0.250 0.250 0.000 1.00 0.00 H | ||
HETATM 7 C1 LLL B 3 0.750 2.250 0.000 1.00 0.00 C | ||
HETATM 8 C2 LLL B 3 0.750 1.750 0.000 1.00 0.00 C | ||
HETATM 9 H LLL B 3 1.250 2.250 0.000 1.00 0.00 H | ||
ENDMDL | ||
CRYST1 25.000 25.000 25.000 90.00 90.00 90.00 P 1 1 | ||
HETATM 1 C1 AAA A 1 0.250 2.250 0.000 1.00 0.00 C | ||
HETATM 2 C2 AAA A 1 0.250 1.750 0.000 1.00 0.00 C | ||
HETATM 3 H AAA A 1 0.750 2.250 0.000 1.00 0.00 H | ||
HETATM 4 C1 BBB A 2 0.250 0.750 0.000 1.00 0.00 C | ||
HETATM 5 C2 BBB A 2 0.250 1.250 0.000 1.00 0.00 C | ||
HETATM 6 H BBB A 2 0.250 0.250 0.000 1.00 0.00 H | ||
HETATM 7 C1 LLL B 3 1.750 2.250 0.000 1.00 0.00 C | ||
HETATM 8 C2 LLL B 3 1.750 1.750 0.000 1.00 0.00 C | ||
HETATM 9 H LLL B 3 1.250 2.250 0.000 1.00 0.00 H | ||
ENDMDL | ||
CRYST1 25.000 25.000 25.000 90.00 90.00 90.00 P 1 1 | ||
HETATM 1 C1 AAA A 1 0.250 2.250 0.000 1.00 0.00 C | ||
HETATM 2 C2 AAA A 1 0.250 1.750 0.000 1.00 0.00 C | ||
HETATM 3 H AAA A 1 0.750 2.250 0.000 1.00 0.00 H | ||
HETATM 4 C1 BBB A 2 1.250 1.250 0.000 1.00 0.00 C | ||
HETATM 5 C2 BBB A 2 0.750 1.250 0.000 1.00 0.00 C | ||
HETATM 6 H BBB A 2 1.750 1.250 0.000 1.00 0.00 H | ||
HETATM 7 C1 LLL B 3 1.750 2.250 0.000 1.00 0.00 C | ||
HETATM 8 C2 LLL B 3 1.750 1.750 0.000 1.00 0.00 C | ||
HETATM 9 H LLL B 3 1.250 2.250 0.000 1.00 0.00 H | ||
ENDMDL | ||
CRYST1 25.000 25.000 25.000 90.00 90.00 90.00 P 1 1 | ||
HETATM 1 C1 AAA A 1 0.250 2.250 0.000 1.00 0.00 C | ||
HETATM 2 C2 AAA A 1 0.250 1.750 0.000 1.00 0.00 C | ||
HETATM 3 H AAA A 1 0.250 2.750 0.000 1.00 0.00 H | ||
HETATM 4 C1 BBB A 2 0.250 0.750 0.000 1.00 0.00 C | ||
HETATM 5 C2 BBB A 2 0.250 1.250 0.000 1.00 0.00 C | ||
HETATM 6 H BBB A 2 0.250 0.250 0.000 1.00 0.00 H | ||
HETATM 7 C1 LLL B 3 1.250 2.250 0.000 1.00 0.00 C | ||
HETATM 8 C2 LLL B 3 1.250 1.750 0.000 1.00 0.00 C | ||
HETATM 9 H LLL B 3 1.250 2.750 0.000 1.00 0.00 H | ||
ENDMDL | ||
END |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is the other way around outer list is length contact pair and inner list is frame