Merge branch 'master' of github.com:prody/ProDy into remove_numpy_ali…

…as_types
jamesmkrieger · Feb 14, 2024 · 84a59d5 · 84a59d5
2 parents cba8c56 + 5f3528f
commit 84a59d5
Show file tree

Hide file tree

Showing 38 changed files with 16,213 additions and 305 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -31,7 +31,7 @@ jobs:
         source activate test
         conda install --yes numpy scipy nose pyparsing requests
         if [[ ${{ matrix.python-version }} == "2.7" ]]; then conda install --yes unittest2; else conda install --yes pdbfixer; fi
-        pip install mmtf-python
+        pip install mmtf-python scikit-learn
         pip install .
         python setup.py build_ext --inplace --force
     - name: Test with pytest

diff --git a/.gitignore b/.gitignore
@@ -24,6 +24,10 @@ __pycache__
 *.pdb.gz
 *.cif
 *.cif.gz
+*.mmtf
+*.map
+*.mrc
+*.dcd
 
 # Docs
 /docs/_build/

diff --git a/prody/apps/prody_apps/prody_select.py b/prody/apps/prody_apps/prody_select.py
@@ -36,9 +36,10 @@ def prody_select(selstr, *pdbs, **kwargs):
     suffix = kwargs.get('suffix', '_selected')
     output = kwargs.get('output', None)
     altloc = kwargs.get('altloc', 'all')
+    uniteChains = kwargs.get('uniteChains', False)
 
     for pdb in pdbs:
-        pdb = parsePDB(pdb, altloc=altloc)
+        pdb = parsePDB(pdb, altloc=altloc, unite_chains=uniteChains)
 
         pdbselect = pdb.select(selstr)
         if pdbselect is None:
@@ -89,6 +90,10 @@ def addCommand(commands):
         type=str, default='_selected',
         help=('output filename suffix (default: %(default)s)'))
 
+    group.add_argument('-u', '--unite-chains', dest='uniteChains',
+        action='store_true',
+        default=False, help=('unite chains if using mmCIF (default False)'))
+
     subparser.add_argument('select', help='atom selection string')
     subparser.add_argument('pdb', nargs='+',
         help='PDB identifier(s) or filename(s)')

diff --git a/prody/atomic/atom.py b/prody/atomic/atom.py
@@ -275,7 +275,7 @@ def toTEMPyAtom(self):
             self.getAltloc(), self.getIcode(),
             self.getCharge(), self.getElement(),
             self.getOccupancy(), self.getResname(),
-            None, self.getACSIndex(), self.getChid(),
+            self.getACSIndex(), self.getChid(),
             self.getResnum())
 
 

diff --git a/prody/atomic/atomic.py b/prody/atomic/atomic.py
@@ -112,6 +112,8 @@ def __getattribute__(self, name):
                                                  ' from ' + str(self))
                         else:
                             return None
+                elif name == '_anisous':
+                    return None
                 else:
                     selstr = name
                     items = name.split('_')
@@ -255,7 +257,7 @@ def getSequence(self, **kwargs):
         return seq
 
     def toTEMPyAtoms(self):
-        """Returns a BioPy.PDB Atom or Structure object as appropriate"""
+        """Returns a :class:`TEMPy.protein.prot_rep_biopy.Atom` or list of them as appropriate"""
         try:
             from TEMPy.protein.prot_rep_biopy import Atom as TEMPyAtom
         except ImportError:
@@ -267,10 +269,152 @@ def toTEMPyAtoms(self):
             return [self.toTEMPyAtom()]
 
     def toTEMPyStructure(self):
-        """Returns a BioPy.PDB Atom or Structure object as appropriate""" 
+        """Returns a :class:`.protein.prot_rep_biopy.Structure` object""" 
         try:
             from TEMPy.protein.prot_rep_biopy import BioPy_Structure
         except ImportError:
             raise ImportError('TEMPy is needed for this functionality')
 
         return BioPy_Structure(self.toTEMPyAtoms())
+
+    def numResidues(self):
+        """Returns number of residues."""
+
+        return len(set(self._getResindices()))
+
+
+    def toBioPythonStructure(self, header=None, **kwargs):
+        """Returns a :class:`Bio.PDB.Structure` object
+
+        :arg atoms: an object with atom and coordinate data
+        :type atoms: :class:`.Atomic`
+
+        :arg csets: coordinate set indices, default is all coordinate sets
+        """ 
+        try:
+            from Bio.PDB.Structure import Structure
+            from Bio.PDB.StructureBuilder import StructureBuilder
+            from Bio.PDB.PDBParser import PDBParser
+            from Bio.PDB.PDBExceptions import PDBConstructionException
+        except ImportError:
+            raise ImportError('Bio StructureBuilder could not be imported. '
+                'Reinstall ProDy or install Biopython '
+                'to solve the problem.')
+
+        origACSI = self.getACSIndex()
+
+        csets = kwargs.get('csets', None)
+        if csets is None:
+            csets = range(self.numCoordsets())
+
+        structure_builder = StructureBuilder()
+        structure_builder.init_structure(self.getTitle())
+        if header is not None:
+            structure_builder.set_header(header)
+
+        result = structure_builder.get_structure()
+        result.is_pqr = (self.getCharges() is not None 
+                         and self.getRadii() is not None)
+
+        for i in csets:
+            self.setACSIndex(i)
+            structure_builder.init_model(i)
+
+            current_segid = None
+            current_chain_id = None
+            current_residue_id = None
+
+            for global_line_counter, atom in enumerate(self):
+                segid = atom.getSegname()
+                if current_segid != segid:
+                    current_segid = segid
+                    structure_builder.init_seg(current_segid)
+
+                chainid = atom.getChid()
+                resname = atom.getResname()
+
+                if atom.getFlag('hetatm'):
+                    if atom.getFlag('water'):
+                        hetero_flag = 'W'
+                    else:
+                        hetero_flag = 'H'
+                else:
+                    hetero_flag = ' '
+
+                resseq = atom.getResnum()
+                icode = atom.getIcode()
+                if len(icode) == 0:
+                    icode = ' '
+                residue_id = (hetero_flag, resseq, icode)
+
+                if current_chain_id != chainid:
+                    current_chain_id = chainid
+                    structure_builder.init_chain(current_chain_id)
+
+                    current_residue_id = residue_id
+                    current_resname = resname
+                    try:
+                        structure_builder.init_residue(
+                            resname, hetero_flag, resseq, icode
+                        )
+                    except PDBConstructionException as message:
+                        result._handle_PDB_exception(message, global_line_counter)
+                elif current_residue_id != residue_id or current_resname != resname:
+                    current_residue_id = residue_id
+                    current_resname = resname
+                    try:
+                        structure_builder.init_residue(
+                            resname, hetero_flag, resseq, icode
+                        )
+                    except PDBConstructionException as message:
+                        result._handle_PDB_exception(message, global_line_counter)
+
+                name = atom.getName()
+                coord = atom.getCoords()
+                altloc = atom.getAltloc()
+                fullname = atom.getName()
+                serial_number = atom.getSerial()
+                element = atom.getElement()
+
+                if not result.is_pqr:
+                    # init atom with pdb fields
+                    try:
+                        structure_builder.init_atom(
+                            name,
+                            coord,
+                            atom.getBeta(),
+                            atom.getOccupancy(),
+                            altloc,
+                            fullname,
+                            serial_number,
+                            element,
+                        )
+                    except PDBConstructionException as message:
+                        result._handle_PDB_exception(message, global_line_counter)
+                else:
+                    try:
+                        structure_builder.init_atom(
+                            name,
+                            coord,
+                            atom.getCharge(),
+                            atom.getRadius(),
+                            altloc,
+                            fullname,
+                            serial_number,
+                            element,
+                            atom.getCharge(),
+                            atom.getRadius(),
+                            result.is_pqr,
+                        )
+                    except PDBConstructionException as message:
+                        result._handle_PDB_exception(message, global_line_counter)
+
+                if atom.getAnisou() is not None:
+                    structure_builder.set_anisou(atom.getAnisou())
+
+                if atom.getAnistd() is not None:
+                    structure_builder.set_siguij(atom.getAnistd())
+
+        self.setACSIndex(origACSI)
+
+        return result
diff --git a/prody/atomic/pointer.py b/prody/atomic/pointer.py
@@ -273,10 +273,35 @@ def _getSubset(self, label):
                             .intersection(set(self._getIndices()))), int)
         subset.sort()
         return subset
+
+    def getAnisous(self):
+        """Returns a copy of anisotropic temperature factors from the active coordinate set."""
+
+        if self._ag._anisous is not None:
+            # Since this is not slicing, a view is not returned
+            return self._ag._anisous[self.getACSIndex(), self._indices]
+
+    _getAnisous = getAnisous
+
+    def getBonds(self):
+        """Returns bonds.  Use :meth:`setBonds` or
+        :meth:`inferBonds` from parent AtomGroup for setting bonds."""
+
+        if self._ag._bonds is not None:
+            iset = set(self._getIndices())
+            acsi = self._acsi
+            return array([Bond(self, bond, acsi) for bond in self._ag._bonds
+                          if bond[0] in iset and bond[1] in iset])
+        return None
+
+    def numBonds(self):
+        """Returns number of bonds.  Use :meth:`setBonds` or
+        :meth:`inferBonds` from parent AtomGroup for setting bonds."""
+        return len(self.getBonds())
 
     def _iterBonds(self):
         """Yield pairs of indices for bonded atoms that are within the pointer.
-        Use :meth:`setBonds` for setting bonds."""
+        Use :meth:`setBonds` from parent AtomGroup for setting bonds."""
 
         if self._ag._bonds is None:
             LOGGER.warning('bonds are not set, use `setBonds` or `inferBonds`')

diff --git a/prody/database/pfam.py b/prody/database/pfam.py
@@ -406,8 +406,10 @@ def parsePfamPDBs(query, data=[], **kwargs):
                 continue
 
             right_dbref = header[data_dict['chain']].dbrefs[right_part]
-            chainStart = ag.select('chain {0}'.format(data_dict['chain'])
-                                  ).getResnums()[0]
+            chain = ag.select('chain {0}'.format(data_dict['chain']))
+            if chain is None:
+                continue
+            chainStart = chain.getResnums()[0]
             missing = chainStart - right_dbref.first[0]
             partStart = ag.getResindices()[np.where(ag.getResnums() == 
                                            right_dbref.first[0] + missing)][0]

diff --git a/prody/dynamics/__init__.py b/prody/dynamics/__init__.py
@@ -360,3 +360,6 @@
 from prody.ensemble import functions
 functions.ClustENM = ClustENM
 
+from . import lda
+from .lda import *
+__all__.extend(lda.__all__)
diff --git a/prody/dynamics/analysis.py b/prody/dynamics/analysis.py
@@ -19,7 +19,8 @@
 from .gnm import GNMBase
 
 __all__ = ['calcCollectivity', 'calcCovariance', 'calcCrossCorr',
-           'calcFractVariance', 'calcSqFlucts', 'calcRMSFlucts', 'calcTempFactors',
+           'calcFractVariance', 'calcSqFlucts', 'calcRMSFlucts',
+           'calcMostMobileNodes', 'calcTempFactors',
            'calcProjection', 'calcCrossProjection',
            'calcSpecDimension', 'calcPairDeformationDist',
            'calcDistFlucts', 'calcHinges', 'calcHitTime',
@@ -343,6 +344,28 @@ def calcRMSFlucts(modes):
 
     return sq_flucts ** 0.5
 
+def calcMostMobileNodes(modes, **kwargs):
+    """Returns indices for nodes with highest root mean square fluctuations (RMSFs) for given set of normal *modes*
+    above a particular *percentile* and/or *cutoff*.
+
+    :arg percentile: percentile for internal cutoff (between 0 and 100).
+        Default 0 takes all values
+    :type percentile: int
+
+    :arg cutoff: user-defined cutoff, default is to take all values
+    :type cutoff: float
+    """
+    rmsf = calcRMSFlucts(modes)
+
+    cutoff = kwargs.get('cutoff', rmsf.min())
+    inds = np.nonzero(rmsf > cutoff)[0]
+
+    percentile = kwargs.get('percentile', 0)
+    cutoff = np.percentile(rmsf, percentile)
+    inds = inds[np.nonzero(rmsf[inds] > cutoff)[0]]
+
+    return inds
+
 def calcCrossCorr(modes, n_cpu=1, norm=True):
     """Returns cross-correlations matrix.  For a 3-d model, cross-correlations
     matrix is an NxN matrix, where N is the number of atoms.  Each element of

diff --git a/prody/dynamics/editing.py b/prody/dynamics/editing.py
@@ -180,7 +180,7 @@ def trimModelByMask(model, mask):
 
     if not isListLike(mask):
         raise TypeError('mask must be either a list or a numpy.ndarray, not {0}'
-                        .format(type(model)))
+                        .format(type(mask)))
 
     is_bool = mask.dtype is np.dtype('bool')
 
@@ -328,7 +328,7 @@ def sliceModelByMask(model, mask, norm=False):
 
     if not isListLike(mask):
         raise TypeError('mask must be either a list or a numpy.ndarray, not {0}'
-                        .format(type(model)))
+                        .format(type(mask)))
 
     is_bool = mask.dtype is np.dtype('bool')
 
@@ -425,7 +425,7 @@ def reduceModelByMask(model, mask):
 
     if not isListLike(mask):
         raise TypeError('mask must be either a list or a numpy.ndarray, not {0}'
-                        .format(type(model)))
+                        .format(type(mask)))
 
     is_bool = mask.dtype is np.dtype('bool')
 

diff --git a/prody/dynamics/functions.py b/prody/dynamics/functions.py
@@ -24,6 +24,7 @@
 from .exanm import exANM, MaskedExANM
 from .rtb import RTB
 from .pca import PCA, EDA
+from .lda import LDA
 from .imanm import imANM
 from .exanm import exANM
 from .mode import Vector, Mode, VectorBase
@@ -82,6 +83,10 @@ def saveModel(nma, filename=None, matrices=False, **kwargs):
         type_ = 'EDA'
     elif isinstance(nma, PCA):
         type_ = 'PCA'
+    elif isinstance(nma, LDA):
+        type_ = 'LDA'
+        attr_list.append('_labels')
+        attr_list.append('_shuffled_ldas')
     else:
         type_ = 'NMA'
 
@@ -177,6 +182,8 @@ def loadModel(filename, **kwargs):
             nma = NMA(title)
         elif type_ == 'RTB':
             nma = RTB(title)
+        elif type_ == 'LDA':
+            nma = LDA(title)
         else:
             raise IOError('NMA model type is not recognized: {0}'.format(type_))
-Original file line number
+Diff line change
@@ Expand Up / @@ -24,6 +24,10 @@ __pycache__ @@
     *.pdb.gz
     *.cif
     *.cif.gz
+    *.mmtf
+    *.map
+    *.mrc
+    *.dcd
     # Docs
     /docs/_build/
@@ Expand Down @@