From 9b5d8d06a759136a79278f4549cd4522baceb715 Mon Sep 17 00:00:00 2001 From: Lily Wang Date: Mon, 1 Jun 2020 21:16:01 +1000 Subject: [PATCH 1/4] select dihedrals faster --- package/MDAnalysis/analysis/dihedrals.py | 111 ++++++++++++++------ package/MDAnalysis/core/topologyattrs.py | 123 ++++++++++++++++------- 2 files changed, 170 insertions(+), 64 deletions(-) diff --git a/package/MDAnalysis/analysis/dihedrals.py b/package/MDAnalysis/analysis/dihedrals.py index 8cb0a50be7a..8966ba30278 100644 --- a/package/MDAnalysis/analysis/dihedrals.py +++ b/package/MDAnalysis/analysis/dihedrals.py @@ -209,6 +209,7 @@ class Dihedral(AnalysisBase): it must be given as a list of one atomgroup. """ + def __init__(self, atomgroups, **kwargs): """Parameters ---------- @@ -221,7 +222,8 @@ def __init__(self, atomgroups, **kwargs): If any atomgroups do not contain 4 atoms """ - super(Dihedral, self).__init__(atomgroups[0].universe.trajectory, **kwargs) + super(Dihedral, self).__init__( + atomgroups[0].universe.trajectory, **kwargs) self.atomgroups = atomgroups if any([len(ag) != 4 for ag in atomgroups]): @@ -261,7 +263,9 @@ class Ramachandran(AnalysisBase): selection cannot be made, that residue will be removed from the analysis. """ - def __init__(self, atomgroup, **kwargs): + + def __init__(self, atomgroup, c_name='C', n_name='N', ca_name='CA', + **kwargs): """Parameters ---------- atomgroup : AtomGroup or ResidueGroup @@ -274,7 +278,8 @@ def __init__(self, atomgroup, **kwargs): If the selection of residues is not contained within the protein """ - super(Ramachandran, self).__init__(atomgroup.universe.trajectory, **kwargs) + super(Ramachandran, self).__init__( + atomgroup.universe.trajectory, **kwargs) self.atomgroup = atomgroup residues = self.atomgroup.residues protein = self.atomgroup.universe.select_atoms("protein").residues @@ -288,26 +293,71 @@ def __init__(self, atomgroup, **kwargs): "or last residues") residues = residues.difference(protein[[0, -1]]) - phi_sel = [res.phi_selection() for res in residues] - psi_sel = [res.psi_selection() for res in residues] - # phi_selection() and psi_selection() currently can't handle topologies - # with an altloc attribute so this removes any residues that have either - # angle return none instead of a value - if any(sel is None for sel in phi_sel): - warnings.warn("Some residues in selection do not have phi selections") - remove = [i for i, sel in enumerate(phi_sel) if sel is None] - phi_sel = [sel for i, sel in enumerate(phi_sel) if i not in remove] - psi_sel = [sel for i, sel in enumerate(psi_sel) if i not in remove] - if any(sel is None for sel in psi_sel): - warnings.warn("Some residues in selection do not have psi selections") - remove = [i for i, sel in enumerate(psi_sel) if sel is None] - phi_sel = [sel for i, sel in enumerate(phi_sel) if i not in remove] - psi_sel = [sel for i, sel in enumerate(psi_sel) if i not in remove] - self.ag1 = mda.AtomGroup([atoms[0] for atoms in phi_sel]) - self.ag2 = mda.AtomGroup([atoms[1] for atoms in phi_sel]) - self.ag3 = mda.AtomGroup([atoms[2] for atoms in phi_sel]) - self.ag4 = mda.AtomGroup([atoms[3] for atoms in phi_sel]) - self.ag5 = mda.AtomGroup([atoms[3] for atoms in psi_sel]) + # find previous/next residues + u = residues[0].universe + nxt = u.residues[residues.ix[:-1]+1] # residues is ordered + if residues[-1].ix != len(u.residues): + nxt += u.residues[residues[-1].ix+1] + else: + residues = residues[:-1] + prev = u.residues[residues.ix-1] + psid = residues.segids + prid = residues.resids-1 + nrid = residues.resids+1 + sel = 'segid {} and resid {}' + + # delete wrong ones + pix = np.where((prev.segids != psid) | (prev.resids != prid))[0] + nix = np.where((nxt.segids != psid) | (nxt.resids != nrid))[0] + delete = [] # probably better way to do this + if len(pix): + prevls = list(prev) + for s, r, p in zip(psid[pix], prid[pix], pix): + try: + prevls[p] = u.select_atoms(sel.format(s, r)).residues[0] + except IndexError: + delete.append(p) + prev = sum(prevls) + + if len(nix): + nxtls = list(nxt) + for s, r, n in zip(psid[nix], nrid[nix], nix): + try: + nxtls[n] = u.select_atoms(sel.format(s, r)).residues[0] + except IndexError: + delete.append(n) + nxt = sum(nxtls) + + if len(delete): + warnings.warn("Some residues in selection do not have " + "phi or psi selections") + keep = np.ones_like(residues, dtype=bool) + keep[delete] = False + prev = prev[keep] + nxt = nxt[keep] + residues = residues[keep] + + + # find n, c, ca + keep_prev = [sum(r.atoms.names==c_name)==1 for r in prev] + rnames = [n_name, c_name, ca_name] + keep_res = [all(sum(r.atoms.names==n)==1 for n in rnames) + for r in residues] + keep_next = [sum(r.atoms.names==n_name)==1 for r in nxt] + + # alright we'll keep these + keep = np.array(keep_prev) & np.array(keep_res) & np.array(keep_next) + prev = prev[keep] + res = residues[keep] + nxt = nxt[keep] + + rnames = res.atoms.names + self.ag1 = prev.atoms[prev.atoms.names == c_name] + self.ag2 = res.atoms[rnames == n_name] + self.ag3 = res.atoms[rnames == ca_name] + self.ag4 = res.atoms[rnames == c_name] + self.ag5 = nxt.atoms[nxt.atoms.names == n_name] + def _prepare(self): self.angles = [] @@ -325,7 +375,6 @@ def _single_frame(self): def _conclude(self): self.angles = np.rad2deg(np.array(self.angles)) - def plot(self, ax=None, ref=False, **kwargs): """Plots data into standard ramachandran plot. Each time step in :attr:`Ramachandran.angles` is plotted onto the same graph. @@ -348,20 +397,22 @@ def plot(self, ax=None, ref=False, **kwargs): """ if ax is None: ax = plt.gca() - ax.axis([-180,180,-180,180]) + ax.axis([-180, 180, -180, 180]) ax.axhline(0, color='k', lw=1) ax.axvline(0, color='k', lw=1) ax.set(xticks=range(-180, 181, 60), yticks=range(-180, 181, 60), xlabel=r"$\phi$ (deg)", ylabel=r"$\psi$ (deg)") if ref == True: - X, Y = np.meshgrid(np.arange(-180, 180, 4), np.arange(-180, 180, 4)) + X, Y = np.meshgrid(np.arange(-180, 180, 4), + np.arange(-180, 180, 4)) levels = [1, 17, 15000] colors = ['#A1D4FF', '#35A1FF'] ax.contourf(X, Y, np.load(Rama_ref), levels=levels, colors=colors) a = self.angles.reshape(np.prod(self.angles.shape[:2]), 2) - ax.scatter(a[:,0], a[:,1], **kwargs) + ax.scatter(a[:, 0], a[:, 1], **kwargs) return ax + class Janin(Ramachandran): """Calculate :math:`\chi_1` and :math:`\chi_2` dihedral angles of selected residues. @@ -380,6 +431,7 @@ class Janin(Ramachandran): selection and must be removed. """ + def __init__(self, atomgroup, **kwargs): """Parameters ---------- @@ -397,7 +449,8 @@ def __init__(self, atomgroup, **kwargs): selection, usually due to missing atoms or alternative locations """ - super(Ramachandran, self).__init__(atomgroup.universe.trajectory, **kwargs) + super(Ramachandran, self).__init__( + atomgroup.universe.trajectory, **kwargs) self.atomgroup = atomgroup residues = atomgroup.residues protein = atomgroup.universe.select_atoms("protein").residues @@ -463,5 +516,5 @@ def plot(self, ax=None, ref=False, **kwargs): colors = ['#A1D4FF', '#35A1FF'] ax.contourf(X, Y, np.load(Janin_ref), levels=levels, colors=colors) a = self.angles.reshape(np.prod(self.angles.shape[:2]), 2) - ax.scatter(a[:,0], a[:,1], **kwargs) + ax.scatter(a[:, 0], a[:, 1], **kwargs) return ax diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py index c805be3734d..c6451f515c2 100644 --- a/package/MDAnalysis/core/topologyattrs.py +++ b/package/MDAnalysis/core/topologyattrs.py @@ -490,19 +490,30 @@ def phi_selection(residue): 4-atom selection in the correct order. If no C' found in the previous residue (by resid) then this method returns ``None``. """ - # TODO: maybe this can be reformulated into one selection string without - # the additions later - sel_str = "segid {} and resid {} and name C".format( - residue.segment.segid, residue.resid - 1) - sel = (residue.universe.select_atoms(sel_str) + - residue.atoms.select_atoms('name N', 'name CA', 'name C')) - - # select_atoms doesnt raise errors if nothing found, so check size - if len(sel) == 4: - return sel - else: + # fnmatch is expensive. try the obv candidate first + prev = residue.universe.residues[residue.ix-1] + sid = residue.segment.segid + rid = residue.resid-1 + if not (prev.segment.segid == sid and prev.resid == rid): + sel = 'segid {} and resid {}'.format(sid, rid) + try: + prev = residue.universe.select_atoms(sel).residues[0] + except IndexError: + return None + c_ = prev.atoms[prev.atoms.names == 'C'] + if not c_: return None + # ncac = residue.atoms.select_atoms('name N', 'name CA', 'name C') + ncac = residue.atoms[np.in1d(residue.atoms.names, ['N', 'CA', 'C'])] + if len(ncac) != 3: + return None + + # sel = c_+ncac + names = ncac.names + sel = c_+ncac[names == 'N']+ncac[names == 'CA']+ncac[names == 'C'] + return sel + transplants[Residue].append(('phi_selection', phi_selection)) def psi_selection(residue): @@ -515,17 +526,39 @@ def psi_selection(residue): 4-atom selection in the correct order. If no N' found in the following residue (by resid) then this method returns ``None``. """ - sel_str = "segid {} and resid {} and name N".format( - residue.segment.segid, residue.resid + 1) - sel = (residue.atoms.select_atoms('name N', 'name CA', 'name C') + - residue.universe.select_atoms(sel_str)) - - if len(sel) == 4: - return sel + # fnmatch is expensive. try the obv candidate first + _manual_sel = False + sid = residue.segment.segid + rid = residue.resid+1 + try: + nxt = residue.universe.residues[residue.ix+1] + except IndexError: + _manual_sel = True else: + if not (nxt.segment.segid == sid and nxt.resid == rid): + _manual_sel = True + + if _manual_sel: + sel = 'segid {} and resid {}'.format(sid, rid) + try: + nxt = residue.universe.select_atoms(sel).residues[0] + except IndexError: + return None + n_ = nxt.atoms[nxt.atoms.names == 'N'] + if not n_: return None + # ncac = residue.atoms.select_atoms('name N', 'name CA', 'name C') + ncac = residue.atoms[np.in1d(residue.atoms.names, ['N', 'CA', 'C'])] + if len(ncac) != 3: + return None + + # sel = c_+ncac + names = ncac.names + sel = ncac[names == 'N']+ncac[names == 'CA']+ncac[names == 'C']+n_ + return sel + transplants[Residue].append(('psi_selection', psi_selection)) def omega_selection(residue): @@ -830,9 +863,11 @@ def moment_of_inertia(group, pbc=False, **kwargs): unwrap = kwargs.pop('unwrap', False) compound = kwargs.pop('compound', 'group') - com = atomgroup.center_of_mass(pbc=pbc, unwrap=unwrap, compound=compound) + com = atomgroup.center_of_mass( + pbc=pbc, unwrap=unwrap, compound=compound) if compound != 'group': - com = (com * group.masses[:, None]).sum(axis=0) / group.masses.sum() + com = (com * group.masses[:, None] + ).sum(axis=0) / group.masses.sum() if pbc: pos = atomgroup.pack_into_box(inplace=False) - com @@ -942,7 +977,8 @@ def shape_parameter(group, pbc=False, **kwargs): recenteredpos[x, :]) tensor /= atomgroup.total_mass() eig_vals = np.linalg.eigvalsh(tensor) - shape = 27.0 * np.prod(eig_vals - np.mean(eig_vals)) / np.power(np.sum(eig_vals), 3) + shape = 27.0 * np.prod(eig_vals - np.mean(eig_vals) + ) / np.power(np.sum(eig_vals), 3) return shape @@ -985,9 +1021,11 @@ def asphericity(group, pbc=False, unwrap=None, compound='group'): atomgroup = group.atoms masses = atomgroup.masses - com = atomgroup.center_of_mass(pbc=pbc, unwrap=unwrap, compound=compound) + com = atomgroup.center_of_mass( + pbc=pbc, unwrap=unwrap, compound=compound) if compound != 'group': - com = (com * group.masses[:, None]).sum(axis=0) / group.masses.sum() + com = (com * group.masses[:, None] + ).sum(axis=0) / group.masses.sum() if pbc: recenteredpos = (atomgroup.pack_into_box(inplace=False) - com) @@ -1200,6 +1238,7 @@ class AltLocs(AtomAttr): def _gen_initial_values(na, nr, ns): return np.array(['' for _ in range(na)], dtype=object) + class GBScreens(AtomAttr): """Generalized Born screening factor""" attrname = 'gbscreens' @@ -1211,6 +1250,7 @@ class GBScreens(AtomAttr): def _gen_initial_values(na, nr, ns): return np.zeros(na) + class SolventRadii(AtomAttr): """Intrinsic solvation radius""" attrname = 'solventradii' @@ -1222,6 +1262,7 @@ class SolventRadii(AtomAttr): def _gen_initial_values(na, nr, ns): return np.zeros(na) + class NonbondedIndices(AtomAttr): """Nonbonded index (AMBER)""" attrname = 'nbindices' @@ -1232,7 +1273,8 @@ class NonbondedIndices(AtomAttr): @staticmethod def _gen_initial_values(na, nr, ns): return np.zeros(na, dtype=np.int32) - + + class RMins(AtomAttr): """The Rmin/2 LJ parameter""" attrname = 'rmins' @@ -1244,6 +1286,7 @@ class RMins(AtomAttr): def _gen_initial_values(na, nr, ns): return np.zeros(na) + class Epsilons(AtomAttr): """The epsilon LJ parameter""" attrname = 'epsilons' @@ -1255,6 +1298,7 @@ class Epsilons(AtomAttr): def _gen_initial_values(na, nr, ns): return np.zeros(na) + class RMin14s(AtomAttr): """The Rmin/2 LJ parameter for 1-4 interactions""" attrname = 'rmin14s' @@ -1266,6 +1310,7 @@ class RMin14s(AtomAttr): def _gen_initial_values(na, nr, ns): return np.zeros(na) + class Epsilon14s(AtomAttr): """The epsilon LJ parameter for 1-4 interactions""" attrname = 'epsilon14s' @@ -1277,6 +1322,7 @@ class Epsilon14s(AtomAttr): def _gen_initial_values(na, nr, ns): return np.zeros(na) + class ResidueAttr(TopologyAttr): attrname = 'residueattrs' singular = 'residueattr' @@ -1414,13 +1460,14 @@ def sequence(self, **kwargs): format = kwargs.pop("format", "SeqRecord") if format not in formats: raise TypeError("Unknown format='{0}': must be one of: {1}".format( - format, ", ".join(formats))) + format, ", ".join(formats))) try: - sequence = "".join([convert_aa_code(r) for r in self.residues.resnames]) + sequence = "".join([convert_aa_code(r) + for r in self.residues.resnames]) except KeyError as err: six.raise_from(ValueError("AtomGroup contains a residue name '{0}' that " - "does not have a IUPAC protein 1-letter " - "character".format(err.message)), None) + "does not have a IUPAC protein 1-letter " + "character".format(err.message)), None) if format == "string": return sequence seq = Bio.Seq.Seq(sequence) @@ -1478,6 +1525,7 @@ class Molnums(ResidueAttr): # segment attributes + class SegmentAttr(TopologyAttr): """Base class for segment attributes. @@ -1535,12 +1583,12 @@ def _check_connection_values(func): """ @functools.wraps(func) def wrapper(self, values, *args, **kwargs): - if not all(len(x) == self._n_atoms - and all(isinstance(y, (int, np.integer)) for y in x) - for x in values): + if not all(len(x) == self._n_atoms + and all(isinstance(y, (int, np.integer)) for y in x) + for x in values): raise ValueError(("{} must be an iterable of tuples with {}" - " atom indices").format(self.attrname, - self._n_atoms)) + " atom indices").format(self.attrname, + self._n_atoms)) clean = [] for v in values: if v[0] > v[-1]: @@ -1550,9 +1598,10 @@ def wrapper(self, values, *args, **kwargs): return func(self, clean, *args, **kwargs) return wrapper + class _Connection(AtomAttr): """Base class for connectivity between atoms - + .. versionchanged:: 0.21.0 Added type checking to atom index values. """ @@ -1638,7 +1687,7 @@ def _add_bonds(self, values, types=None, guessed=True, order=None): del self._cache['bd'] except KeyError: pass - + @_check_connection_values def _delete_bonds(self, values): """ @@ -1668,6 +1717,7 @@ def _delete_bonds(self, values): except KeyError: pass + class Bonds(_Connection): """Bonds between two atoms @@ -1820,6 +1870,7 @@ def n_fragments(self): ('n_fragments', property(n_fragments, None, None, n_fragments.__doc__))) + class UreyBradleys(_Connection): """Angles between two atoms @@ -1834,6 +1885,7 @@ class UreyBradleys(_Connection): transplants = defaultdict(list) _n_atoms = 2 + class Angles(_Connection): """Angles between three atoms @@ -1863,6 +1915,7 @@ class Impropers(_Connection): transplants = defaultdict(list) _n_atoms = 4 + class CMaps(_Connection): """ A connection between five atoms From 56fa4e84bcaae36b414d5c473e45bd56a2000416 Mon Sep 17 00:00:00 2001 From: Lily Wang Date: Tue, 2 Jun 2020 00:58:35 +1000 Subject: [PATCH 2/4] added residuegroup dihedrals and refactored residue dihedrals --- package/MDAnalysis/core/topologyattrs.py | 383 ++++++++++++++++-- .../MDAnalysisTests/core/test_atomgroup.py | 208 ++++++++-- .../MDAnalysisTests/core/test_residuegroup.py | 7 + 3 files changed, 537 insertions(+), 61 deletions(-) diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py index c6451f515c2..90afc138aa1 100644 --- a/package/MDAnalysis/core/topologyattrs.py +++ b/package/MDAnalysis/core/topologyattrs.py @@ -480,15 +480,28 @@ class Atomnames(AtomAttr): def _gen_initial_values(na, nr, ns): return np.array(['' for _ in range(na)], dtype=object) - def phi_selection(residue): - """AtomGroup corresponding to the phi protein backbone dihedral + def phi_selection(residue, c_name='C', n_name='N', ca_name='CA'): + """Select AtomGroup corresponding to the phi protein backbone dihedral C'-N-CA-C. + Parameters + ---------- + c_name: str (optional) + name for the backbone C atom + n_name: str (optional) + name for the backbone N atom + ca_name: str (optional) + name for the alpha-carbon atom + Returns ------- AtomGroup 4-atom selection in the correct order. If no C' found in the previous residue (by resid) then this method returns ``None``. + + .. versionchanged:: 1.0.0 + Added arguments for flexible atom names and refactored code for + faster atom matching with boolean arrays. """ # fnmatch is expensive. try the obv candidate first prev = residue.universe.residues[residue.ix-1] @@ -500,31 +513,106 @@ def phi_selection(residue): prev = residue.universe.select_atoms(sel).residues[0] except IndexError: return None - c_ = prev.atoms[prev.atoms.names == 'C'] - if not c_: + c_ = prev.atoms[prev.atoms.names == c_name] + if not len(c_) == 1: return None - # ncac = residue.atoms.select_atoms('name N', 'name CA', 'name C') - ncac = residue.atoms[np.in1d(residue.atoms.names, ['N', 'CA', 'C'])] - if len(ncac) != 3: + atnames = residue.atoms.names + ncac_names = [n_name, ca_name, c_name] + ncac = [residue.atoms[atnames == n] for n in ncac_names] + if not all(len(ag) == 1 for ag in ncac): return None - # sel = c_+ncac - names = ncac.names - sel = c_+ncac[names == 'N']+ncac[names == 'CA']+ncac[names == 'C'] + sel = c_+sum(ncac) return sel transplants[Residue].append(('phi_selection', phi_selection)) - def psi_selection(residue): - """AtomGroup corresponding to the psi protein backbone dihedral + def phi_selections(residues, c_name='C', n_name='N', ca_name='CA'): + """Select list of AtomGroups corresponding to the phi protein + backbone dihedral C'-N-CA-C. + + Parameters + ---------- + c_name: str (optional) + name for the backbone C atom + n_name: str (optional) + name for the backbone N atom + ca_name: str (optional) + name for the alpha-carbon atom + + Returns + ------- + list of AtomGroups + 4-atom selections in the correct order. If no C' found in the + previous residue (by resid) then corresponding item in the list + is ``None``. + + .. versionadded:: 1.0.0 + """ + + u = residues[0].universe + prev = u.residues[residues.ix-1] # obv candidates first + rsid = residues.segids + prid = residues.resids-1 + ncac_names = [n_name, ca_name, c_name] + sel = 'segid {} and resid {}' + + # replace wrong residues + wix = np.where((prev.segids != rsid) | (prev.resids != prid))[0] + invalid = [] + if len(wix): + prevls = list(prev) + for s, r, i in zip(rsid[wix], prid[wix], wix): + try: + prevls[i] = u.select_atoms(sel.format(s, r)).residues[0] + except IndexError: + invalid.append(i) + prev = sum(prevls) + + keep_prev = [sum(r.atoms.names == c_name) == 1 for r in prev] + keep_res = [all(sum(r.atoms.names == n) == 1 for n in ncac_names) + for r in residues] + keep = np.array(keep_prev) & np.array(keep_res) + keep[invalid] = False + results = np.zeros_like(residues, dtype=object) + results[~keep] = None + prev = prev[keep] + residues = residues[keep] + keepix = np.where(keep)[0] + + c_ = prev.atoms[prev.atoms.names == c_name] + n = residues.atoms[residues.atoms.names == n_name] + ca = residues.atoms[residues.atoms.names == ca_name] + c = residues.atoms[residues.atoms.names == c_name] + results[keepix] = [sum(atoms) for atoms in zip(c_, n, ca, c)] + return list(results) + + transplants[ResidueGroup].append(('phi_selections', phi_selections)) + + + def psi_selection(residue, c_name='C', n_name='N', ca_name='CA'): + """Select AtomGroup corresponding to the psi protein backbone dihedral N-CA-C-N'. + Parameters + ---------- + c_name: str (optional) + name for the backbone C atom + n_name: str (optional) + name for the backbone N atom + ca_name: str (optional) + name for the alpha-carbon atom + Returns ------- AtomGroup 4-atom selection in the correct order. If no N' found in the following residue (by resid) then this method returns ``None``. + + .. versionchanged:: 1.0.0 + Added arguments for flexible atom names and refactored code for + faster atom matching with boolean arrays. """ # fnmatch is expensive. try the obv candidate first @@ -545,52 +633,232 @@ def psi_selection(residue): nxt = residue.universe.select_atoms(sel).residues[0] except IndexError: return None - n_ = nxt.atoms[nxt.atoms.names == 'N'] - if not n_: + n_ = nxt.atoms[nxt.atoms.names == n_name] + if not len(n_) == 1: return None - # ncac = residue.atoms.select_atoms('name N', 'name CA', 'name C') - ncac = residue.atoms[np.in1d(residue.atoms.names, ['N', 'CA', 'C'])] - if len(ncac) != 3: + atnames = residue.atoms.names + ncac_names = [n_name, ca_name, c_name] + ncac = [residue.atoms[atnames == n] for n in ncac_names] + if not all(len(ag) == 1 for ag in ncac): return None - # sel = c_+ncac - names = ncac.names - sel = ncac[names == 'N']+ncac[names == 'CA']+ncac[names == 'C']+n_ + sel = sum(ncac) + n_ return sel transplants[Residue].append(('psi_selection', psi_selection)) - def omega_selection(residue): - """AtomGroup corresponding to the omega protein backbone dihedral + def _get_next_residues_by_resid(residues): + """Select list of Residues corresponding to the next resid for each + residue in `residues`. + + Returns + ------- + List of Residues + The next residue in Universe. If not found, the corresponding + item in the list is ``None``. + + .. versionadded:: 1.0.0 + """ + u = residues[0].universe + nxres = rview = np.array([None]*len(residues), dtype=object) + # no guarantee residues is ordered or unique + last = max(residues.ix) + if last == len(u.residues)-1: + notlast = residues.ix != last + rview = nxres[notlast] + residues = residues[notlast] + + rview[:] = nxt = u.residues[residues.ix+1] + rsid = residues.segids + nrid = residues.resids+1 + sel = 'segid {} and resid {}' + invalid = [] + + # replace wrong residues + wix = np.where((nxt.segids != rsid) | (nxt.resids != nrid))[0] + if len(wix): + for s, r, i in zip(rsid[wix], nrid[wix], wix): + try: + rview[i] = u.select_atoms(sel.format(s, r)).residues[0] + except IndexError: + rview[i] = None + return nxres + + transplants[ResidueGroup].append(('_get_next_residues_by_resid', + _get_next_residues_by_resid)) + + def psi_selections(residues, c_name='C', n_name='N', ca_name='CA'): + """Select list of AtomGroups corresponding to the psi protein + backbone dihedral N-CA-C-N'. + + Parameters + ---------- + c_name: str (optional) + name for the backbone C atom + n_name: str (optional) + name for the backbone N atom + ca_name: str (optional) + name for the alpha-carbon atom + + Returns + ------- + List of AtomGroups + 4-atom selections in the correct order. If no N' found in the + following residue (by resid) then the corresponding item in the + list is ``None``. + + .. versionadded:: 1.0.0 + """ + results = np.array([None]*len(residues), dtype=object) + nxtres = residues._get_next_residues_by_resid() + rix = np.where(nxtres)[0] + nxt = sum(nxtres[rix]) + residues = residues[rix] + ncac_names = [n_name, ca_name, c_name] + + keep_nxt = [sum(r.atoms.names == n_name) == 1 for r in nxt] + keep_res = [all(sum(r.atoms.names == n) == 1 for n in ncac_names) + for r in residues] + keep = np.array(keep_nxt) & np.array(keep_res) + nxt = nxt[keep] + residues = residues[keep] + keepix = np.where(keep)[0] + + n = residues.atoms[residues.atoms.names == n_name] + ca = residues.atoms[residues.atoms.names == ca_name] + c = residues.atoms[residues.atoms.names == c_name] + n_ = nxt.atoms[nxt.atoms.names == n_name] + results[rix[keepix]] = [sum(atoms) for atoms in zip(n, ca, c, n_)] + return list(results) + + transplants[ResidueGroup].append(('psi_selections', psi_selections)) + + def omega_selection(residue, c_name='C', n_name='N', ca_name='CA'): + """Select AtomGroup corresponding to the omega protein backbone dihedral CA-C-N'-CA'. omega describes the -C-N- peptide bond. Typically, it is trans (180 degrees) although cis-bonds (0 degrees) are also occasionally observed (especially near Proline). + Parameters + ---------- + c_name: str (optional) + name for the backbone C atom + n_name: str (optional) + name for the backbone N atom + ca_name: str (optional) + name for the alpha-carbon atom + Returns ------- AtomGroup 4-atom selection in the correct order. If no C' found in the previous residue (by resid) then this method returns ``None``. + .. versionchanged:: 1.0.0 + Added arguments for flexible atom names and refactored code for + faster atom matching with boolean arrays. """ - nextres = residue.resid + 1 - segid = residue.segment.segid - sel = (residue.atoms.select_atoms('name CA', 'name C') + - residue.universe.select_atoms( - 'segid {} and resid {} and name N'.format(segid, nextres), - 'segid {} and resid {} and name CA'.format(segid, nextres))) - if len(sel) == 4: - return sel + # fnmatch is expensive. try the obv candidate first + _manual_sel = False + sid = residue.segment.segid + rid = residue.resid+1 + try: + nxt = residue.universe.residues[residue.ix+1] + except IndexError: + _manual_sel = True else: + if not (nxt.segment.segid == sid and nxt.resid == rid): + _manual_sel = True + + if _manual_sel: + sel = 'segid {} and resid {}'.format(sid, rid) + try: + nxt = residue.universe.select_atoms(sel).residues[0] + except IndexError: + return None + + ca = residue.atoms[residue.atoms.names == ca_name] + c = residue.atoms[residue.atoms.names == c_name] + n_ = nxt.atoms[nxt.atoms.names == n_name] + ca_ = nxt.atoms[nxt.atoms.names == ca_name] + + if not all(len(ag) == 1 for ag in [ca_, n_, ca, c]): return None + return ca+c+n_+ca_ + transplants[Residue].append(('omega_selection', omega_selection)) - def chi1_selection(residue): - """AtomGroup corresponding to the chi1 sidechain dihedral N-CA-CB-CG. + def omega_selections(residues, c_name='C', n_name='N', ca_name='CA'): + """Select list of AtomGroups corresponding to the omega protein + backbone dihedral CA-C-N'-CA'. + + omega describes the -C-N- peptide bond. Typically, it is trans (180 + degrees) although cis-bonds (0 degrees) are also occasionally observed + (especially near Proline). + + Parameters + ---------- + c_name: str (optional) + name for the backbone C atom + n_name: str (optional) + name for the backbone N atom + ca_name: str (optional) + name for the alpha-carbon atom + + Returns + ------- + List of AtomGroups + 4-atom selections in the correct order. If no C' found in the + previous residue (by resid) then the corresponding item in the + list is ``None``. + + .. versionadded:: 1.0.0 + """ + results = np.array([None]*len(residues), dtype=object) + nxtres = residues._get_next_residues_by_resid() + rix = np.where(nxtres)[0] + nxt = sum(nxtres[rix]) + residues = residues[rix] + + nxtatoms = [ca_name, n_name] + resatoms = [ca_name, c_name] + keep_nxt = [all(sum(r.atoms.names == n) == 1 for n in nxtatoms) + for r in nxt] + keep_res = [all(sum(r.atoms.names == n) == 1 for n in resatoms) + for r in residues] + keep = np.array(keep_nxt) & np.array(keep_res) + nxt = nxt[keep] + residues = residues[keep] + keepix = np.where(keep)[0] + + c = residues.atoms[residues.atoms.names == c_name] + ca = residues.atoms[residues.atoms.names == ca_name] + n_ = nxt.atoms[nxt.atoms.names == n_name] + ca_ = nxt.atoms[nxt.atoms.names == ca_name] + + results[rix[keepix]] = [sum(atoms) for atoms in zip(ca, c, n_, ca_)] + return list(results) + + transplants[ResidueGroup].append(('omega_selections', omega_selections)) + + def chi1_selection(residue, n_name='N', ca_name='CA', cb_name='CB', + cg_name='CG'): + """Select AtomGroup corresponding to the chi1 sidechain dihedral N-CA-CB-CG. + + Parameters + ---------- + c_name: str (optional) + name for the backbone C atom + ca_name: str (optional) + name for the alpha-carbon atom + cb_name: str (optional) + name for the beta-carbon atom + cg_name: str (optional) + name for the gamma-carbon atom Returns ------- @@ -598,17 +866,58 @@ def chi1_selection(residue): 4-atom selection in the correct order. If no CB and/or CG is found then this method returns ``None``. + .. versionchanged:: 1.0.0 + Added arguments for flexible atom names and refactored code for + faster atom matching with boolean arrays. + .. versionadded:: 0.7.5 """ - ag = residue.atoms.select_atoms('name N', 'name CA', - 'name CB', 'name CG') - if len(ag) == 4: - return ag - else: + names = [n_name, ca_name, cb_name, cg_name] + ags = [residue.atoms[residue.atoms.names == n] for n in names] + if any(len(ag)!= 1 for ag in ags): return None + return sum(ags) transplants[Residue].append(('chi1_selection', chi1_selection)) + def chi1_selections(residues, n_name='N', ca_name='CA', cb_name='CB', + cg_name='CG'): + """Select list of AtomGroups corresponding to the chi1 sidechain dihedral + N-CA-CB-CG. + + Parameters + ---------- + c_name: str (optional) + name for the backbone C atom + ca_name: str (optional) + name for the alpha-carbon atom + cb_name: str (optional) + name for the beta-carbon atom + cg_name: str (optional) + name for the gamma-carbon atom + + Returns + ------- + List of AtomGroups + 4-atom selections in the correct order. If no CB and/or CG is found + then the corresponding item in the list is ``None``. + + .. versionadded:: 1.0.0 + """ + results = np.array([None]*len(residues)) + names = [n_name, ca_name, cb_name, cg_name] + keep = [all(sum(r.atoms.names == n) == 1 for n in names) + for r in residues] + keepix = np.where(keep)[0] + residues = residues[keep] + + atnames = residues.atoms.names + ags = [residues.atoms[atnames == n] for n in names] + results[keepix] = [sum(atoms) for atoms in zip(*ags)] + return list(results) + + transplants[ResidueGroup].append(('chi1_selections', chi1_selections)) + # TODO: update docs to property doc class Atomtypes(AtomAttr): diff --git a/testsuite/MDAnalysisTests/core/test_atomgroup.py b/testsuite/MDAnalysisTests/core/test_atomgroup.py index 881a4818621..24f8a0bd524 100644 --- a/testsuite/MDAnalysisTests/core/test_atomgroup.py +++ b/testsuite/MDAnalysisTests/core/test_atomgroup.py @@ -744,51 +744,211 @@ def test_adding_empty_ags(self): class TestDihedralSelections(object): dih_prec = 2 + @staticmethod + @pytest.fixture(scope='module') + def GRO(): + return mda.Universe(GRO) + @staticmethod @pytest.fixture(scope='module') def PSFDCD(): return mda.Universe(PSF, DCD) - def test_phi_selection(self, PSFDCD): - phisel = PSFDCD.segments[0].residues[9].phi_selection() + @staticmethod + @pytest.fixture(scope='class') + def resgroup(GRO): + return GRO.segments[0].residues[8:10] + + def test_phi_selection(self, GRO): + phisel = GRO.segments[0].residues[9].phi_selection() assert_equal(phisel.names, ['C', 'N', 'CA', 'C']) assert_equal(phisel.residues.resids, [9, 10]) assert_equal(phisel.residues.resnames, ['PRO', 'GLY']) - def test_psi_selection(self, PSFDCD): - psisel = PSFDCD.segments[0].residues[9].psi_selection() + @pytest.mark.parametrize('kwargs,names', [ + ({'c_name': 'O'}, ['O', 'N', 'CA', 'O']), + ({'n_name': 'O'}, ['C', 'O', 'CA', 'C']), + ({'ca_name': 'O'}, ['C', 'N', 'O', 'C']) + ]) + def test_phi_selection_name(self, GRO, kwargs, names): + phisel = GRO.segments[0].residues[9].phi_selection(**kwargs) + assert_equal(phisel.names, names) + assert_equal(phisel.residues.resids, [9, 10]) + assert_equal(phisel.residues.resnames, ['PRO', 'GLY']) + + def test_phi_selections_single(self, GRO): + rgsel = GRO.segments[0].residues[[9]].phi_selections() + assert len(rgsel) == 1 + phisel = rgsel[0] + assert_equal(phisel.names, ['C', 'N', 'CA', 'C']) + assert_equal(phisel.residues.resids, [9, 10]) + assert_equal(phisel.residues.resnames, ['PRO', 'GLY']) + + def test_phi_selections(self, resgroup): + rgsel = resgroup.phi_selections() + rssel = [r.phi_selection() for r in resgroup] + assert_equal(rgsel, rssel) + + @pytest.mark.parametrize('kwargs,names', [ + ({'c_name': 'O'}, ['O', 'N', 'CA', 'O']), + ({'n_name': 'O'}, ['C', 'O', 'CA', 'C']), + ({'ca_name': 'O'}, ['C', 'N', 'O', 'C']) + ]) + def test_phi_selections_name(self, resgroup, kwargs, names): + rgsel = resgroup.phi_selections(**kwargs) + for ag in rgsel: + assert_equal(ag.names, names) + + def test_psi_selection(self, GRO): + psisel = GRO.segments[0].residues[9].psi_selection() assert_equal(psisel.names, ['N', 'CA', 'C', 'N']) assert_equal(psisel.residues.resids, [10, 11]) assert_equal(psisel.residues.resnames, ['GLY', 'ALA']) - def test_omega_selection(self, PSFDCD): - osel = PSFDCD.segments[0].residues[7].omega_selection() + @pytest.mark.parametrize('kwargs,names', [ + ({'c_name': 'O'}, ['N', 'CA', 'O', 'N']), + ({'n_name': 'O'}, ['O', 'CA', 'C', 'O']), + ({'ca_name': 'O'}, ['N', 'O', 'C', 'N']), + ]) + def test_psi_selection_name(self, GRO, kwargs, names): + psisel = GRO.segments[0].residues[9].psi_selection(**kwargs) + assert_equal(psisel.names, names) + assert_equal(psisel.residues.resids, [10, 11]) + assert_equal(psisel.residues.resnames, ['GLY', 'ALA']) + + def test_psi_selections_single(self, GRO): + rgsel = GRO.segments[0].residues[[9]].psi_selections() + assert len(rgsel) == 1 + psisel = rgsel[0] + assert_equal(psisel.names, ['N', 'CA', 'C', 'N']) + assert_equal(psisel.residues.resids, [10, 11]) + assert_equal(psisel.residues.resnames, ['GLY', 'ALA']) + + def test_psi_selections(self, resgroup): + rgsel = resgroup.psi_selections() + rssel = [r.psi_selection() for r in resgroup] + assert_equal(rgsel, rssel) + + @pytest.mark.parametrize('kwargs,names', [ + ({'c_name': 'O'}, ['N', 'CA', 'O', 'N']), + ({'n_name': 'O'}, ['O', 'CA', 'C', 'O']), + ({'ca_name': 'O'}, ['N', 'O', 'C', 'N']), + ]) + def test_psi_selections_name(self, resgroup, kwargs, names): + rgsel = resgroup.psi_selections(**kwargs) + for ag in rgsel: + assert_equal(ag.names, names) + + def test_omega_selection(self, GRO): + osel = GRO.segments[0].residues[7].omega_selection() assert_equal(osel.names, ['CA', 'C', 'N', 'CA']) assert_equal(osel.residues.resids, [8, 9]) assert_equal(osel.residues.resnames, ['ALA', 'PRO']) - def test_chi1_selection(self, PSFDCD): - sel = PSFDCD.segments[0].residues[12].chi1_selection() # LYS + @pytest.mark.parametrize('kwargs,names', [ + ({'c_name': 'O'}, ['CA', 'O', 'N', 'CA']), + ({'n_name': 'O'}, ['CA', 'C', 'O', 'CA']), + ({'ca_name': 'O'}, ['O', 'C', 'N', 'O']), + ]) + def test_omega_selection_name(self, GRO, kwargs, names): + osel = GRO.segments[0].residues[7].omega_selection(**kwargs) + assert_equal(osel.names, names) + assert_equal(osel.residues.resids, [8, 9]) + assert_equal(osel.residues.resnames, ['ALA', 'PRO']) + + def test_omega_selections_single(self, GRO): + rgsel = GRO.segments[0].residues[[7]].omega_selections() + assert len(rgsel) == 1 + osel = rgsel[0] + assert_equal(osel.names, ['CA', 'C', 'N', 'CA']) + assert_equal(osel.residues.resids, [8, 9]) + assert_equal(osel.residues.resnames, ['ALA', 'PRO']) + + def test_omega_selections(self, resgroup): + rgsel = resgroup.omega_selections() + rssel = [r.omega_selection() for r in resgroup] + assert_equal(rgsel, rssel) + + @pytest.mark.parametrize('kwargs,names', [ + ({'c_name': 'O'}, ['CA', 'O', 'N', 'CA']), + ({'n_name': 'O'}, ['CA', 'C', 'O', 'CA']), + ({'ca_name': 'O'}, ['O', 'C', 'N', 'O']), + ]) + def test_omega_selections_name(self, resgroup, kwargs, names): + rgsel = resgroup.omega_selections(**kwargs) + for ag in rgsel: + assert_equal(ag.names, names) + + def test_chi1_selection(self, GRO): + sel = GRO.segments[0].residues[12].chi1_selection() # LYS + assert_equal(sel.names, ['N', 'CA', 'CB', 'CG']) + assert_equal(sel.residues.resids, [13]) + assert_equal(sel.residues.resnames, ['LYS']) + + @pytest.mark.parametrize('kwargs,names', [ + ({'n_name': 'O'}, ['O', 'CA', 'CB', 'CG']), + ({'ca_name': 'O'}, ['N', 'O', 'CB', 'CG']), + ({'cb_name': 'O'}, ['N', 'CA', 'O', 'CG']), + ({'cg_name': 'O'}, ['N', 'CA', 'CB', 'O']), + ]) + def test_chi1_selection_name(self, GRO, kwargs, names): + sel = GRO.segments[0].residues[12].chi1_selection(**kwargs) # LYS + assert_equal(sel.names, names) + assert_equal(sel.residues.resids, [13]) + assert_equal(sel.residues.resnames, ['LYS']) + + def test_chi1_selections_single(self, GRO): + rgsel = GRO.segments[0].residues[[12]].chi1_selections() + assert len(rgsel) == 1 + sel = rgsel[0] assert_equal(sel.names, ['N', 'CA', 'CB', 'CG']) assert_equal(sel.residues.resids, [13]) assert_equal(sel.residues.resnames, ['LYS']) - def test_phi_sel_fail(self, PSFDCD): - sel = PSFDCD.residues[0].phi_selection() + def test_chi1_selections(self, resgroup): + rgsel = resgroup.chi1_selections() + rssel = [r.chi1_selection() for r in resgroup] + assert_equal(rgsel, rssel) + + def test_phi_sel_fail(self, GRO): + sel = GRO.residues[0].phi_selection() assert sel is None - def test_psi_sel_fail(self, PSFDCD): - sel = PSFDCD.residues[-1].psi_selection() + def test_phi_sels_fail(self, GRO): + rgsel = GRO.residues[212:216].phi_selections() + assert rgsel[0] is not None + assert rgsel[1] is not None + assert_equal(rgsel[-2:], [None, None]) + + def test_psi_sel_fail(self, GRO): + sel = GRO.residues[-1].psi_selection() assert sel is None - def test_omega_sel_fail(self, PSFDCD): - sel = PSFDCD.residues[-1].omega_selection() + def test_psi_sels_fail(self, GRO): + rgsel = GRO.residues[211:215].psi_selections() + assert rgsel[0] is not None + assert rgsel[1] is not None + assert_equal(rgsel[-2:], [None, None]) + + def test_omega_sel_fail(self, GRO): + sel = GRO.residues[-1].omega_selection() assert sel is None - def test_ch1_sel_fail(self, PSFDCD): - sel = PSFDCD.segments[0].residues[7].chi1_selection() + def test_omega_sels_fail(self, GRO): + rgsel = GRO.residues[211:215].omega_selections() + assert rgsel[0] is not None + assert rgsel[1] is not None + assert_equal(rgsel[-2:], [None, None]) + + def test_ch1_sel_fail(self, GRO): + sel = GRO.segments[0].residues[7].chi1_selection() assert sel is None # ALA + def test_chi1_sels_fail(self, GRO): + rgsel = GRO.residues[12:14].chi1_selections() + assert rgsel[0] is not None + assert rgsel[1] is None + def test_dihedral_phi(self, PSFDCD): phisel = PSFDCD.segments[0].residues[9].phi_selection() assert_almost_equal(phisel.dihedral.value(), -168.57384, self.dih_prec) @@ -805,21 +965,21 @@ def test_dihedral_chi1(self, PSFDCD): sel = PSFDCD.segments[0].residues[12].chi1_selection() # LYS assert_almost_equal(sel.dihedral.value(), -58.428127, self.dih_prec) - def test_phi_nodep(self, PSFDCD): + def test_phi_nodep(self, GRO): with no_deprecated_call(): - phisel = PSFDCD.segments[0].residues[9].phi_selection() + phisel = GRO.segments[0].residues[9].phi_selection() - def test_psi_nodep(self, PSFDCD): + def test_psi_nodep(self, GRO): with no_deprecated_call(): - psisel = PSFDCD.segments[0].residues[9].psi_selection() + psisel = GRO.segments[0].residues[9].psi_selection() - def test_omega_nodep(self, PSFDCD): + def test_omega_nodep(self, GRO): with no_deprecated_call(): - osel = PSFDCD.segments[0].residues[7].omega_selection() + osel = GRO.segments[0].residues[7].omega_selection() - def test_chi1_nodep(self, PSFDCD): + def test_chi1_nodep(self, GRO): with no_deprecated_call(): - sel = PSFDCD.segments[0].residues[12].chi1_selection() # LYS + sel = GRO.segments[0].residues[12].chi1_selection() # LYS class TestUnwrapFlag(object): diff --git a/testsuite/MDAnalysisTests/core/test_residuegroup.py b/testsuite/MDAnalysisTests/core/test_residuegroup.py index 4adcb3a1250..81fbd5cf680 100644 --- a/testsuite/MDAnalysisTests/core/test_residuegroup.py +++ b/testsuite/MDAnalysisTests/core/test_residuegroup.py @@ -265,3 +265,10 @@ def test_set_masses(self, universe): def test_atom_order(self, universe): assert_equal(universe.residues.atoms.indices, sorted(universe.residues.atoms.indices)) + + def test_get_next_residue(self, rg): + unsorted_rep_res = rg[[0, 1, 8, 3, 4, 0, 3, 1]] + next_res = sum(unsorted_rep_res._get_next_residues_by_resid()) + resids = unsorted_rep_res.resids+1 + assert_equal(len(next_res), len(unsorted_rep_res)) + assert_equal(next_res.resids, resids) From fb29974183ffd91360c3e6f123012d51f622c574 Mon Sep 17 00:00:00 2001 From: Lily Wang Date: Tue, 2 Jun 2020 01:57:05 +1000 Subject: [PATCH 3/4] updated Rama and added function to get previous residues --- package/MDAnalysis/analysis/dihedrals.py | 84 +++++++------------ package/MDAnalysis/core/topologyattrs.py | 51 +++++++++-- .../analysis/test_dihedrals.py | 13 ++- .../MDAnalysisTests/core/test_residuegroup.py | 19 ++++- 4 files changed, 98 insertions(+), 69 deletions(-) diff --git a/package/MDAnalysis/analysis/dihedrals.py b/package/MDAnalysis/analysis/dihedrals.py index 8966ba30278..cfb49b042dd 100644 --- a/package/MDAnalysis/analysis/dihedrals.py +++ b/package/MDAnalysis/analysis/dihedrals.py @@ -265,12 +265,21 @@ class Ramachandran(AnalysisBase): """ def __init__(self, atomgroup, c_name='C', n_name='N', ca_name='CA', - **kwargs): + check_protein=True, **kwargs): """Parameters ---------- atomgroup : AtomGroup or ResidueGroup atoms for residues for which :math:`\phi` and :math:`\psi` are calculated + c_name: str (optional) + name for the backbone C atom + n_name: str (optional) + name for the backbone N atom + ca_name: str (optional) + name for the alpha-carbon atom + check_protein: bool (optional) + whether to raise an error if the provided atomgroup is not a + subset of protein atoms Raises ------ @@ -282,61 +291,30 @@ def __init__(self, atomgroup, c_name='C', n_name='N', ca_name='CA', atomgroup.universe.trajectory, **kwargs) self.atomgroup = atomgroup residues = self.atomgroup.residues - protein = self.atomgroup.universe.select_atoms("protein").residues - if not residues.issubset(protein): - raise ValueError("Found atoms outside of protein. Only atoms " - "inside of a 'protein' selection can be used to " - "calculate dihedrals.") - elif not residues.isdisjoint(protein[[0, -1]]): - warnings.warn("Cannot determine phi and psi angles for the first " - "or last residues") - residues = residues.difference(protein[[0, -1]]) - - # find previous/next residues - u = residues[0].universe - nxt = u.residues[residues.ix[:-1]+1] # residues is ordered - if residues[-1].ix != len(u.residues): - nxt += u.residues[residues[-1].ix+1] - else: - residues = residues[:-1] - prev = u.residues[residues.ix-1] - psid = residues.segids - prid = residues.resids-1 - nrid = residues.resids+1 - sel = 'segid {} and resid {}' - - # delete wrong ones - pix = np.where((prev.segids != psid) | (prev.resids != prid))[0] - nix = np.where((nxt.segids != psid) | (nxt.resids != nrid))[0] - delete = [] # probably better way to do this - if len(pix): - prevls = list(prev) - for s, r, p in zip(psid[pix], prid[pix], pix): - try: - prevls[p] = u.select_atoms(sel.format(s, r)).residues[0] - except IndexError: - delete.append(p) - prev = sum(prevls) - - if len(nix): - nxtls = list(nxt) - for s, r, n in zip(psid[nix], nrid[nix], nix): - try: - nxtls[n] = u.select_atoms(sel.format(s, r)).residues[0] - except IndexError: - delete.append(n) - nxt = sum(nxtls) - - if len(delete): + if check_protein: + protein = self.atomgroup.universe.select_atoms("protein").residues + + if not residues.issubset(protein): + raise ValueError("Found atoms outside of protein. Only atoms " + "inside of a 'protein' selection can be used to " + "calculate dihedrals.") + elif not residues.isdisjoint(protein[[0, -1]]): + warnings.warn("Cannot determine phi and psi angles for the first " + "or last residues") + residues = residues.difference(protein[[0, -1]]) + + prev = residues._get_prev_residues_by_resid() + nxt = residues._get_next_residues_by_resid() + keep = np.array([r is not None for r in prev]) + keep = keep & np.array([r is not None for r in nxt]) + + if not np.all(keep): warnings.warn("Some residues in selection do not have " "phi or psi selections") - keep = np.ones_like(residues, dtype=bool) - keep[delete] = False - prev = prev[keep] - nxt = nxt[keep] - residues = residues[keep] - + prev = sum(prev[keep]) + nxt = sum(nxt[keep]) + residues = residues[keep] # find n, c, ca keep_prev = [sum(r.atoms.names==c_name)==1 for r in prev] diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py index 90afc138aa1..7e66fd82f36 100644 --- a/package/MDAnalysis/core/topologyattrs.py +++ b/package/MDAnalysis/core/topologyattrs.py @@ -655,39 +655,72 @@ def _get_next_residues_by_resid(residues): Returns ------- List of Residues - The next residue in Universe. If not found, the corresponding - item in the list is ``None``. + List of the next residues in the Universe, by resid and segid. + If not found, the corresponding item in the list is ``None``. .. versionadded:: 1.0.0 """ u = residues[0].universe - nxres = rview = np.array([None]*len(residues), dtype=object) + nxres = np.array([None]*len(residues), dtype=object) + ix = np.arange(len(residues)) # no guarantee residues is ordered or unique last = max(residues.ix) if last == len(u.residues)-1: notlast = residues.ix != last - rview = nxres[notlast] + ix = ix[notlast] residues = residues[notlast] - rview[:] = nxt = u.residues[residues.ix+1] + nxres[ix] = nxt = u.residues[residues.ix+1] rsid = residues.segids nrid = residues.resids+1 sel = 'segid {} and resid {}' - invalid = [] # replace wrong residues wix = np.where((nxt.segids != rsid) | (nxt.resids != nrid))[0] if len(wix): for s, r, i in zip(rsid[wix], nrid[wix], wix): try: - rview[i] = u.select_atoms(sel.format(s, r)).residues[0] + nxres[ix[i]] = u.select_atoms(sel.format(s, r)).residues[0] except IndexError: - rview[i] = None + nxres[ix[i]] = None return nxres - + transplants[ResidueGroup].append(('_get_next_residues_by_resid', _get_next_residues_by_resid)) + def _get_prev_residues_by_resid(residues): + """Select list of Residues corresponding to the previous resid for each + residue in `residues`. + + Returns + ------- + List of Residues + List of the previous residues in the Universe, by resid and segid. + If not found, the corresponding item in the list is ``None``. + + .. versionadded:: 1.0.0 + """ + u = residues[0].universe + pvres = np.array([None]*len(residues)) + pvres[:] = prev = u.residues[residues.ix-1] + rsid = residues.segids + prid = residues.resids-1 + sel = 'segid {} and resid {}' + + # replace wrong residues + wix = np.where((prev.segids != rsid) | (prev.resids != prid))[0] + if len(wix): + for s, r, i in zip(rsid[wix], prid[wix], wix): + try: + pvres[i] = u.select_atoms(sel.format(s, r)).residues[0] + except IndexError: + pvres[i] = None + return pvres + + + transplants[ResidueGroup].append(('_get_prev_residues_by_resid', + _get_prev_residues_by_resid)) + def psi_selections(residues, c_name='C', n_name='N', ca_name='CA'): """Select list of AtomGroups corresponding to the psi protein backbone dihedral N-CA-C-N'. diff --git a/testsuite/MDAnalysisTests/analysis/test_dihedrals.py b/testsuite/MDAnalysisTests/analysis/test_dihedrals.py index 060082f808e..1719d81c935 100644 --- a/testsuite/MDAnalysisTests/analysis/test_dihedrals.py +++ b/testsuite/MDAnalysisTests/analysis/test_dihedrals.py @@ -106,11 +106,18 @@ def test_ramachandran_residue_selections(self, universe): def test_outside_protein_length(self, universe): with pytest.raises(ValueError): - rama = Ramachandran(universe.select_atoms("resid 220")).run() + rama = Ramachandran(universe.select_atoms("resid 220"), + check_protein=True).run() + + def test_outside_protein_unchecked(self, universe): + rama = Ramachandran(universe.select_atoms("resid 220"), + check_protein=False).run() def test_protein_ends(self, universe): - with pytest.warns(UserWarning): - rama = Ramachandran(universe.select_atoms("protein")).run() + with pytest.warns(UserWarning) as record: + rama = Ramachandran(universe.select_atoms("protein"), + check_protein=True).run() + assert len(record) == 1 def test_None_removal(self): with pytest.warns(UserWarning): diff --git a/testsuite/MDAnalysisTests/core/test_residuegroup.py b/testsuite/MDAnalysisTests/core/test_residuegroup.py index 81fbd5cf680..607fac8f1f0 100644 --- a/testsuite/MDAnalysisTests/core/test_residuegroup.py +++ b/testsuite/MDAnalysisTests/core/test_residuegroup.py @@ -267,8 +267,19 @@ def test_atom_order(self, universe): sorted(universe.residues.atoms.indices)) def test_get_next_residue(self, rg): - unsorted_rep_res = rg[[0, 1, 8, 3, 4, 0, 3, 1]] - next_res = sum(unsorted_rep_res._get_next_residues_by_resid()) - resids = unsorted_rep_res.resids+1 + unsorted_rep_res = rg[[0, 1, 8, 3, 4, 0, 3, 1, -1]] + next_res = unsorted_rep_res._get_next_residues_by_resid() + resids = list(unsorted_rep_res.resids+1) + resids[-1] = None + next_resids = [r.resid if r is not None else None for r in next_res] assert_equal(len(next_res), len(unsorted_rep_res)) - assert_equal(next_res.resids, resids) + assert_equal(next_resids, resids) + + def test_get_prev_residue(self, rg): + unsorted_rep_res = rg[[0, 1, 8, 3, 4, 0, 3, 1, -1]] + prev_res = unsorted_rep_res._get_prev_residues_by_resid() + resids = list(unsorted_rep_res.resids-1) + resids[0] = resids[5] = None + prev_resids = [r.resid if r is not None else None for r in prev_res] + assert_equal(len(prev_res), len(unsorted_rep_res)) + assert_equal(prev_resids, resids) From c1827e780a60de797bd63b9e281a0b0964abd7f7 Mon Sep 17 00:00:00 2001 From: Lily Wang Date: Tue, 2 Jun 2020 02:31:11 +1000 Subject: [PATCH 4/4] moved docstring to class, added example, updated CHANGELOG --- package/CHANGELOG | 2 + package/MDAnalysis/analysis/dihedrals.py | 67 ++++++++++++++++-------- 2 files changed, 46 insertions(+), 23 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index d33c2b4c92d..de51fc6c777 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -135,6 +135,8 @@ Enhancements the capability to allow intermittent behaviour (PR #2256) Changes + * Refactored dihedral selections and Ramachandran.__init__ to speed up + dihedral selections for faster tests (Issue #2671, PR #2706) * Removes the deprecated `t0`, `tf`, and `dtmax` from :class:Waterdynamics.SurvivalProbability. Instead the `start`, `stop` and `tau_max` keywords should be passed to diff --git a/package/MDAnalysis/analysis/dihedrals.py b/package/MDAnalysis/analysis/dihedrals.py index cfb49b042dd..cb19645e65e 100644 --- a/package/MDAnalysis/analysis/dihedrals.py +++ b/package/MDAnalysis/analysis/dihedrals.py @@ -254,39 +254,60 @@ class Ramachandran(AnalysisBase): :class:`~MDAnalysis.ResidueGroup` is generated from `atomgroup` which is compared to the protein to determine if it is a legitimate selection. + Parameters + ---------- + atomgroup : AtomGroup or ResidueGroup + atoms for residues for which :math:`\phi` and :math:`\psi` are + calculated + c_name: str (optional) + name for the backbone C atom + n_name: str (optional) + name for the backbone N atom + ca_name: str (optional) + name for the alpha-carbon atom + check_protein: bool (optional) + whether to raise an error if the provided atomgroup is not a + subset of protein atoms + + Example + ------- + For standard proteins, the default arguments will suffice to run a + Ramachandran analysis:: + + r = Ramachandran(u.select_atoms('protein')).run() + + For proteins with non-standard residues, or for calculating dihedral + angles for other linear polymers, you can switch off the protein checking + and provide your own atom names in place of the typical peptide backbone + atoms:: + + r = Ramachandran(u.atoms, c_name='CX', n_name='NT', ca_name='S', + check_protein=False).run() + + The above analysis will calculate angles from a "phi" selection of + CX'-NT-S-CX and "psi" selections of NT-S-CX-NT'. + + Raises + ------ + ValueError + If the selection of residues is not contained within the protein + and ``check_protein`` is ``True`` + Note ---- - If the residue selection is beyond the scope of the protein, then an error - will be raised. If the residue selection includes the first or last residue, + If ``check_protein`` is ``True`` and the residue selection is beyond + the scope of the protein and, then an error will be raised. + If the residue selection includes the first or last residue, then a warning will be raised and they will be removed from the list of residues, but the analysis will still run. If a :math:`\phi` or :math:`\psi` selection cannot be made, that residue will be removed from the analysis. + .. versionchanged:: 1.0.0 + added c_name, n_name, ca_name, and check_protein keyword arguments """ def __init__(self, atomgroup, c_name='C', n_name='N', ca_name='CA', check_protein=True, **kwargs): - """Parameters - ---------- - atomgroup : AtomGroup or ResidueGroup - atoms for residues for which :math:`\phi` and :math:`\psi` are - calculated - c_name: str (optional) - name for the backbone C atom - n_name: str (optional) - name for the backbone N atom - ca_name: str (optional) - name for the alpha-carbon atom - check_protein: bool (optional) - whether to raise an error if the provided atomgroup is not a - subset of protein atoms - - Raises - ------ - ValueError - If the selection of residues is not contained within the protein - - """ super(Ramachandran, self).__init__( atomgroup.universe.trajectory, **kwargs) self.atomgroup = atomgroup