add ability to parse nmr shielding tensors

ekwan · Oct 5, 2020 · 89f38d4 · 89f38d4
1 parent cd7c238
commit 89f38d4
Show file tree

Hide file tree

Showing 6 changed files with 139 additions and 16 deletions.
diff --git a/cctk/molecule.py b/cctk/molecule.py
@@ -1666,3 +1666,12 @@ def csearch(self, nprocs=1, constraints=[], logfile=None, noncovalent=False):
         import cctk.optimize as opt
         assert isinstance(nprocs, int), "nprocs must be int!"
         return opt.csearch(self, nprocs=nprocs, constraints=constraints, noncovalent=noncovalent, logfile=logfile)
+
+    def num_neighbors_by_atom(self):
+        """
+        Returns a list of the number of neighbors of each atom.
+        """
+        result = []
+        for i in range(self.num_atoms()):
+            result.append(len(self.get_adjacent_atoms(i)))
+        return result
diff --git a/cctk/parse_gaussian.py b/cctk/parse_gaussian.py
@@ -11,34 +11,52 @@
 Functions to help with parsing Gaussian files
 """
 
-def read_nmr_shifts(lines, num_atoms):
+def read_nmr_shifts(blocks, num_atoms):
     """
     Helper method to search through output file and read NMR shifts.
     Args:
         lines (list): list of lines in file
         num_atoms (int): number of atoms expected
     Returns:
         list of isotropic NMR shifts (np.ndarray)
+        list of shielding tensors (list of 3x3 np.ndarray)
     """
     # assumes that lines only come from one Link1 section
     shieldings = []
-    for line in lines:
-        fields = line.split()
-        if len(fields) == 8 and fields[2] == "Isotropic" and fields[5] == "Anisotropy":
+    tensors = []
+    for block in blocks:
+        lines = block.split("\n")
+        tensor = np.zeros(shape=(3,3))
+        for line in lines:
             fields = line.split()
-            assert len(fields) == 8, f"Expected 8 fields on an NMR shielding output line but found {len(fields)} instead!"
-            try:
-                shielding = float(fields[4])
-            except:
-                raise ValueError(f"Error parsing NMR shielding output line:\n{line}")
-            shieldings.append(shielding)
+            # there are 8 on each line but we truncate the first 2 in the block selection process
+            if len(fields) == 6 and fields[0] == "Isotropic" and fields[3] == "Anisotropy":
+                fields = line.split()
+                assert len(fields) == 6, f"Expected 6 fields on an NMR shielding output line but found {len(fields)} instead!"
+                try:
+                    shielding = float(fields[2])
+                except:
+                    raise ValueError(f"Error parsing NMR shielding output line:\n{line}")
+                shieldings.append(shielding)
+
+        # yes, this is very elegant.
+        tensor[0][0] = float(re.search("XX=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
+        tensor[0][1] = float(re.search("XY=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
+        tensor[0][2] = float(re.search("XZ=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
+        tensor[1][0] = float(re.search("YX=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
+        tensor[1][1] = float(re.search("YY=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
+        tensor[1][2] = float(re.search("YZ=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
+        tensor[2][0] = float(re.search("ZX=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
+        tensor[2][1] = float(re.search("ZY=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
+        tensor[2][2] = float(re.search("ZZ=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
+        tensors.append(tensor)
 
     if len(shieldings) is not 0:
         assert len(shieldings) == num_atoms, f"Expected {num_atoms} shieldings but found {len(shieldings)}!"
-        return np.asarray(shieldings)
+        return np.asarray(shieldings), tensors
     else:
         #### we can catch this problem later if the file is finished
-        return None
+        return None, None
 
 def split_link1(filename):
     """
@@ -106,6 +124,7 @@ def read_file_fast(file_text, filename, link1idx, max_len=10000, extended_opt_in
         ["Mulliken charges", "Sum of Mulliken charges", 1],
         ["Electronic spatial extent", "Quadrupole moment", 1], #10
         ["normal coordinates", "Thermochemistry", 1],
+        ["Isotropic", "Eigenvalues", 1000],
     ]
 
     word_matches = [[] for _ in words]
@@ -289,9 +308,10 @@ def read_file_fast(file_text, filename, link1idx, max_len=10000, extended_opt_in
             properties[-1]["quasiharmonic_gibbs_free_energy"] = float(corrected_free_energy)
 
     if cctk.GaussianJobType.NMR in job_types:
-        nmr_shifts = read_nmr_shifts(word_matches[16], molecules[0].num_atoms())
+        nmr_shifts, shielding_tensors = read_nmr_shifts(block_matches[12], molecules[0].num_atoms())
         if nmr_shifts is not None:
             properties[-1]["isotropic_shielding"] = nmr_shifts.view(cctk.OneIndexedArray)
+            properties[-1]["shielding_tensors"] = shielding_tensors
 
         if re.search("nmr=mixed", f.route_card, flags=re.IGNORECASE) or re.search("nmr=spinspin", f.route_card,flags=re.IGNORECASE):
             couplings = read_j_couplings(block_matches[6], molecules[0].num_atoms())
@@ -571,3 +591,75 @@ def parse_modes(freq_block, num_atoms, hpmodes=False):
         modes.append(cctk.VibrationalMode(frequency=f, reduced_mass=m, force_constant=k, displacements=d))
 
     return modes
+
+def read_j_couplings(lines, n_atoms):
+    """
+    Helper method to search through output file and read J couplings
+    Args:
+        lines (list): list of lines in file
+        n_atoms (int): how many atoms are in the molecule
+    Returns:
+        ``couplings`` symmetric 2D np.array of couplings (in Hz) with zero-indexed atoms on both axes
+        or None if no couplings were found
+    """
+    couplings = np.zeros((n_atoms,n_atoms))
+    n_full_blocks, lines_in_partial_block = divmod(n_atoms,5)
+    n_lines = 5 * (n_full_blocks * (n_full_blocks+1) / 2) + n_full_blocks + 1
+    if lines_in_partial_block > 0:
+        n_lines += 1 + lines_in_partial_block
+    n_lines = int(n_lines)
+
+    lines = lines[0].split("\n")
+
+    i = 0
+    read_column_indices = False
+    read_row = False
+    this_column_indices = []
+    while i < n_lines:
+        # get current line
+        line = lines[i]
+
+        # if this is the header, we should be reading the column indices next
+        if "Total nuclear spin-spin coupling J (Hz):" in line:
+            i += 1
+            read_column_indices = True
+            continue
+
+        # this is not the header, so split the fields
+        fields = line.split()
+
+        # read the column indices
+        if read_column_indices:
+#            this_n_columns = len(fields)
+            this_column_indices = [ int(j)-1 for j in fields ]
+            i += 1
+            read_column_indices = False
+            read_row = True
+            continue
+        elif read_row:
+            row = int(fields[0])-1
+            for j,value in enumerate(fields[1:]):
+                column = this_column_indices[j]
+                value = value.replace("D","E")
+                value = float(value)
+                couplings[row,column] = value
+                couplings[column,row] = value
+
+            # check if we have read the entire matrix
+            if row == n_atoms - 1 and column == n_atoms - 1:
+                break
+
+            # check if this is the end of the current block
+            if row == n_atoms - 1:
+                read_column_indices = True
+                read_row = False
+                i += 1
+                continue
+
+            read_row = True
+            i += 1
+            continue
+        else:
+            raise ValueError("impossible")
+
+    return couplings
diff --git a/docs/recipe_01.rst b/docs/recipe_01.rst
@@ -151,7 +151,7 @@ Using Custom Basis Sets from the Basis Set Exchange
 
 """""""""""""""""""""""""""
 Creating Molecules By Name
-""""""""""""""""""""""""""
+"""""""""""""""""""""""""""
 
 - If ``rdkit`` is installed, then molecules can be created from a name or SMILES string. Structures should be checked for sanity!
 

diff --git a/docs/recipe_06.rst b/docs/recipe_06.rst
@@ -8,7 +8,6 @@ NMR Spectroscopy
 Chemical Shifts
 """""""""""""""
 
-- ``import cctk`` is assumed.
 - Isotropic shieldings are read automatically.
 - The ``scale_nmr_shifts`` function will apply scaling factors to the shieldings
   to produce chemical shift predictions.
@@ -107,4 +106,23 @@ Coupling Constants
     self.assertTrue(np.any(expected_couplings-couplings < 0.1))
 
 
+""""""""""""""""""
+Shielding Tensors
+""""""""""""""""""
+
+- The 3 by 3 shielding tensors are stored in ``shielding_tensors`` in ``properties_dict``.
+- Each tensor is a ``np.ndarray`` (0-indexed), stored in a 0-indexed list.
+
+::
+
+    gaussian_file = cctk.GaussianFile.read_file("test/static/methane.out")
+    ensemble = gaussian_file.ensemble
+    shielding_tensors = ensemble[-1,"shielding_tensors"]
+    print(shielding_tensors(1))
+
+    # will print
+    # array([[32.6869,  3.0191,  3.0191],
+    #   [ 3.0191, 32.6869,  3.0191],
+    #   [ 3.0191,  3.0191, 32.6869]]
+
 
diff --git a/docs/recipe_08.rst b/docs/recipe_08.rst
@@ -99,7 +99,7 @@ Epimerization
 
 """""""""""""""""
 Detecting Groups
-""""""""""""""""
+"""""""""""""""""
 
 - *cctk* can search within existing molecules for groups and return the relevant atom numbers.
 

diff --git a/test/test_nmr.py b/test/test_nmr.py
@@ -21,6 +21,10 @@ def test_nmr1(self):
         shieldings = ensemble[:,"isotropic_shielding"]
         self.assertListEqual(list(shieldings), [198.2259, 32.6869, 32.6869, 32.6869, 32.6869])
 
+        tensors = ensemble[:, "shielding_tensors"]
+        self.assertEqual(tensors[0][0][0], 198.2259)
+        self.assertEqual(tensors[1][0][0], 32.6869)
+
     def test_nmr2(self):
         # this file contains opt freq followed by Link1 NMR on methane
         gaussian_file = cctk.GaussianFile.read_file("test/static/methane2.out")