diff --git a/cctk/ensemble.py b/cctk/ensemble.py index fee0483..f2181b4 100644 --- a/cctk/ensemble.py +++ b/cctk/ensemble.py @@ -596,3 +596,16 @@ def get_geometric_parameters(self, parameter, atom1, atom2, atom3=None, atom4=No return output + def assign_connectivity(self, index=0): + """ + Assigns connectivity for all molecules based on molecule of index ``index``. Much faster than assigning connectivity for each individually -- but assumes all bonding is the same. + """ + assert isinstance(index, int), "Need integer index" + bonds = self.molecules[index].assign_connectivity().bonds + + for mol in self.molecules: + mol.bonds = bonds + + return self + + diff --git a/cctk/molecule.py b/cctk/molecule.py index 19bdafa..d69e2da 100644 --- a/cctk/molecule.py +++ b/cctk/molecule.py @@ -35,7 +35,6 @@ class Molecule: bonds (nx.Graph or list of tuples): connectivity graph or list of 2-tuples, with each element representing the 1-indexed atom number of a bonded pair charge (int): the charge of the molecule multiplicity (int): the spin state of the molecule (1 corresponds to singlet, 2 to doublet, 3 to triplet, etc. -- so a multiplicity of 1 is equivalent to S=0) - checks (bool): whether to check that the constructor parameters are valid vibrational_modes (list of cctk.VibrationalMode): vibrational modes """ @@ -44,6 +43,9 @@ def __init__(self, atomic_numbers, geometry, name=None, bonds=None, charge=0, mu Create new Molecule object, and assign connectivity if needed. ``bonds`` must be a list of edges (i.e. an n x 2 ``numpy`` array). + + If ``checks`` is True, the atomic numbers in bonds will all be checked for consistency. + This option can be disabled by setting ``checks`` to False, but this is not recommended for external data. """ if len(atomic_numbers) != len(geometry): raise ValueError(f"length of geometry ({len(geometry)}) and atomic_numbers ({len(atomic_numbers)}) does not match!\n{atomic_numbers}\n{geometry}") @@ -1665,3 +1667,12 @@ def csearch(self, nprocs=1, constraints=[], logfile=None, noncovalent=False, use import cctk.optimize as opt assert isinstance(nprocs, int), "nprocs must be int!" return opt.csearch(molecule=self, nprocs=nprocs, constraints=constraints, noncovalent=noncovalent, logfile=logfile, use_tempdir=use_tempdir) + + def num_neighbors_by_atom(self): + """ + Returns a list of the number of neighbors of each atom. + """ + result = [] + for i in range(self.num_atoms()): + result.append(len(self.get_adjacent_atoms(i))) + return result diff --git a/cctk/parse_gaussian.py b/cctk/parse_gaussian.py index f77d81e..c3584c6 100644 --- a/cctk/parse_gaussian.py +++ b/cctk/parse_gaussian.py @@ -11,7 +11,7 @@ Functions to help with parsing Gaussian files """ -def read_nmr_shifts(lines, num_atoms): +def read_nmr_shifts(blocks, num_atoms): """ Helper method to search through output file and read NMR shifts. Args: @@ -19,26 +19,44 @@ def read_nmr_shifts(lines, num_atoms): num_atoms (int): number of atoms expected Returns: list of isotropic NMR shifts (np.ndarray) + list of shielding tensors (list of 3x3 np.ndarray) """ # assumes that lines only come from one Link1 section shieldings = [] - for line in lines: - fields = line.split() - if len(fields) == 8 and fields[2] == "Isotropic" and fields[5] == "Anisotropy": + tensors = [] + for block in blocks: + lines = block.split("\n") + tensor = np.zeros(shape=(3,3)) + for line in lines: fields = line.split() - assert len(fields) == 8, f"Expected 8 fields on an NMR shielding output line but found {len(fields)} instead!" - try: - shielding = float(fields[4]) - except: - raise ValueError(f"Error parsing NMR shielding output line:\n{line}") - shieldings.append(shielding) + # there are 8 on each line but we truncate the first 2 in the block selection process + if len(fields) == 6 and fields[0] == "Isotropic" and fields[3] == "Anisotropy": + fields = line.split() + assert len(fields) == 6, f"Expected 6 fields on an NMR shielding output line but found {len(fields)} instead!" + try: + shielding = float(fields[2]) + except: + raise ValueError(f"Error parsing NMR shielding output line:\n{line}") + shieldings.append(shielding) + + # yes, this is very elegant. + tensor[0][0] = float(re.search("XX=\s+(?P-?\d+\.\d+)", block).group("val")) + tensor[0][1] = float(re.search("XY=\s+(?P-?\d+\.\d+)", block).group("val")) + tensor[0][2] = float(re.search("XZ=\s+(?P-?\d+\.\d+)", block).group("val")) + tensor[1][0] = float(re.search("YX=\s+(?P-?\d+\.\d+)", block).group("val")) + tensor[1][1] = float(re.search("YY=\s+(?P-?\d+\.\d+)", block).group("val")) + tensor[1][2] = float(re.search("YZ=\s+(?P-?\d+\.\d+)", block).group("val")) + tensor[2][0] = float(re.search("ZX=\s+(?P-?\d+\.\d+)", block).group("val")) + tensor[2][1] = float(re.search("ZY=\s+(?P-?\d+\.\d+)", block).group("val")) + tensor[2][2] = float(re.search("ZZ=\s+(?P-?\d+\.\d+)", block).group("val")) + tensors.append(tensor) if len(shieldings) is not 0: assert len(shieldings) == num_atoms, f"Expected {num_atoms} shieldings but found {len(shieldings)}!" - return np.asarray(shieldings) + return np.asarray(shieldings), tensors else: #### we can catch this problem later if the file is finished - return None + return None, None def split_link1(filename): """ @@ -106,6 +124,7 @@ def read_file_fast(file_text, filename, link1idx, max_len=10000, extended_opt_in ["Mulliken charges", "Sum of Mulliken charges", 1], ["Electronic spatial extent", "Quadrupole moment", 1], #10 ["normal coordinates", "Thermochemistry", 1], + ["Isotropic", "Eigenvalues", 1000], ] word_matches = [[] for _ in words] @@ -289,9 +308,10 @@ def read_file_fast(file_text, filename, link1idx, max_len=10000, extended_opt_in properties[-1]["quasiharmonic_gibbs_free_energy"] = float(corrected_free_energy) if cctk.GaussianJobType.NMR in job_types: - nmr_shifts = read_nmr_shifts(word_matches[16], molecules[0].num_atoms()) + nmr_shifts, shielding_tensors = read_nmr_shifts(block_matches[12], molecules[0].num_atoms()) if nmr_shifts is not None: properties[-1]["isotropic_shielding"] = nmr_shifts.view(cctk.OneIndexedArray) + properties[-1]["shielding_tensors"] = shielding_tensors if re.search("nmr=mixed", f.route_card, flags=re.IGNORECASE) or re.search("nmr=spinspin", f.route_card,flags=re.IGNORECASE): couplings = read_j_couplings(block_matches[6], molecules[0].num_atoms()) @@ -304,7 +324,7 @@ def read_file_fast(file_text, filename, link1idx, max_len=10000, extended_opt_in properties[0]["forces"] = forces if cctk.GaussianJobType.POP in job_types: - if re.search("hirshfeld", f.route_card) or re.search("cm5", f.route_card): + if re.search("hirshfeld", f.route_card) or re.search("cm5", f.route_card) and len(block_matches[8]) > 0: charges, spins = parse_hirshfeld(block_matches[8]) properties[-1]["hirshfeld_charges"] = charges properties[-1]["hirshfeld_spins"] = spins @@ -467,6 +487,10 @@ def parse_charges_dipole(mulliken_block, dipole_block): def parse_hirshfeld(hirshfeld_block): charges = [] spins = [] + + if len(hirshfeld_block) == 0: + return None, None + for line in hirshfeld_block[0].split("\n")[2:]: fields = re.split(" +", line) fields = list(filter(None, fields)) @@ -567,3 +591,75 @@ def parse_modes(freq_block, num_atoms, hpmodes=False): modes.append(cctk.VibrationalMode(frequency=f, reduced_mass=m, force_constant=k, displacements=d)) return modes + +def read_j_couplings(lines, n_atoms): + """ + Helper method to search through output file and read J couplings + Args: + lines (list): list of lines in file + n_atoms (int): how many atoms are in the molecule + Returns: + ``couplings`` symmetric 2D np.array of couplings (in Hz) with zero-indexed atoms on both axes + or None if no couplings were found + """ + couplings = np.zeros((n_atoms,n_atoms)) + n_full_blocks, lines_in_partial_block = divmod(n_atoms,5) + n_lines = 5 * (n_full_blocks * (n_full_blocks+1) / 2) + n_full_blocks + 1 + if lines_in_partial_block > 0: + n_lines += 1 + lines_in_partial_block + n_lines = int(n_lines) + + lines = lines[0].split("\n") + + i = 0 + read_column_indices = False + read_row = False + this_column_indices = [] + while i < n_lines: + # get current line + line = lines[i] + + # if this is the header, we should be reading the column indices next + if "Total nuclear spin-spin coupling J (Hz):" in line: + i += 1 + read_column_indices = True + continue + + # this is not the header, so split the fields + fields = line.split() + + # read the column indices + if read_column_indices: +# this_n_columns = len(fields) + this_column_indices = [ int(j)-1 for j in fields ] + i += 1 + read_column_indices = False + read_row = True + continue + elif read_row: + row = int(fields[0])-1 + for j,value in enumerate(fields[1:]): + column = this_column_indices[j] + value = value.replace("D","E") + value = float(value) + couplings[row,column] = value + couplings[column,row] = value + + # check if we have read the entire matrix + if row == n_atoms - 1 and column == n_atoms - 1: + break + + # check if this is the end of the current block + if row == n_atoms - 1: + read_column_indices = True + read_row = False + i += 1 + continue + + read_row = True + i += 1 + continue + else: + raise ValueError("impossible") + + return couplings diff --git a/docs/recipe_01.rst b/docs/recipe_01.rst index 2f21efa..a01163f 100644 --- a/docs/recipe_01.rst +++ b/docs/recipe_01.rst @@ -151,7 +151,7 @@ Using Custom Basis Sets from the Basis Set Exchange """"""""""""""""""""""""""" Creating Molecules By Name -"""""""""""""""""""""""""" +""""""""""""""""""""""""""" - If ``rdkit`` is installed, then molecules can be created from a name or SMILES string. Structures should be checked for sanity! diff --git a/docs/recipe_06.rst b/docs/recipe_06.rst index 7206cfa..c26155b 100644 --- a/docs/recipe_06.rst +++ b/docs/recipe_06.rst @@ -8,7 +8,6 @@ NMR Spectroscopy Chemical Shifts """"""""""""""" -- ``import cctk`` is assumed. - Isotropic shieldings are read automatically. - The ``scale_nmr_shifts`` function will apply scaling factors to the shieldings to produce chemical shift predictions. @@ -107,4 +106,23 @@ Coupling Constants self.assertTrue(np.any(expected_couplings-couplings < 0.1)) +"""""""""""""""""" +Shielding Tensors +"""""""""""""""""" + +- The 3 by 3 shielding tensors are stored in ``shielding_tensors`` in ``properties_dict``. +- Each tensor is a ``np.ndarray`` (0-indexed), stored in a 0-indexed list. + +:: + + gaussian_file = cctk.GaussianFile.read_file("test/static/methane.out") + ensemble = gaussian_file.ensemble + shielding_tensors = ensemble[-1,"shielding_tensors"] + print(shielding_tensors(1)) + + # will print + # array([[32.6869, 3.0191, 3.0191], + # [ 3.0191, 32.6869, 3.0191], + # [ 3.0191, 3.0191, 32.6869]] + diff --git a/docs/recipe_08.rst b/docs/recipe_08.rst index 7492235..6eaf23c 100644 --- a/docs/recipe_08.rst +++ b/docs/recipe_08.rst @@ -99,7 +99,7 @@ Epimerization """"""""""""""""" Detecting Groups -"""""""""""""""" +""""""""""""""""" - *cctk* can search within existing molecules for groups and return the relevant atom numbers. diff --git a/scripts/analyze.py b/scripts/analyze.py index d5c34a0..d10a269 100644 --- a/scripts/analyze.py +++ b/scripts/analyze.py @@ -41,6 +41,7 @@ def main(): parser.add_argument("--h", action="store_true") parser.add_argument("--standard_state", action="store_true", default=False) parser.add_argument("--correct_gibbs", action="store_true", default=False) + parser.add_argument("--sort", action="store_true", default=False) parser.add_argument("filename") args = vars(parser.parse_args(sys.argv[1:])) @@ -83,12 +84,18 @@ def main(): if not isinstance(values[0], list): values = [values] - df = pd.DataFrame(values, columns=property_names).fillna("") + df = pd.DataFrame(values, columns=property_names) df.sort_values("filename", inplace=True) + if args["correct_gibbs"]: + df.rename(columns={"quasiharmonic_gibbs_free_energy": "gibbs_free_energy"}, inplace=True) + if args["g"]: - df["rel_energy"] = (df.quasiharmonic_gibbs_free_energy - df.quasiharmonic_gibbs_free_energy.min()) * 627.509469 + print(df.columns) + df = df.dropna(subset=["gibbs_free_energy"]) + df["rel_energy"] = (df.gibbs_free_energy - df.gibbs_free_energy.min()) * 627.509469 elif args["h"]: + df = df.dropna(subset=["enthalpy"]) df["rel_energy"] = (df.enthalpy - df.enthalpy.min()) * 627.509469 else: try: @@ -96,10 +103,11 @@ def main(): except: df["rel_energy"] = 0 - if args["correct_gibbs"]: - df.rename(columns={"rms_displacement": "rms_disp", "quasiharmonic_gibbs_free_energy": "GFE"}, inplace=True) - else: - df.rename(columns={"rms_displacement": "rms_disp", "gibbs_free_energy": "GFE"}, inplace=True) + if args["sort"]: + df.sort_values("rel_energy", inplace=True) + + df.fillna("") + df.rename(columns={"rms_displacement": "rms_disp", "gibbs_free_energy": "GFE"}, inplace=True) if args["standard_state"]: # convert to 1 M standard state = 6.354 e.u. * 298 K / 4184 J/kcal diff --git a/test/test_nmr.py b/test/test_nmr.py index 7f554a8..79fce89 100644 --- a/test/test_nmr.py +++ b/test/test_nmr.py @@ -21,6 +21,10 @@ def test_nmr1(self): shieldings = ensemble[:,"isotropic_shielding"] self.assertListEqual(list(shieldings), [198.2259, 32.6869, 32.6869, 32.6869, 32.6869]) + tensors = ensemble[:, "shielding_tensors"] + self.assertEqual(tensors[0][0][0], 198.2259) + self.assertEqual(tensors[1][0][0], 32.6869) + def test_nmr2(self): # this file contains opt freq followed by Link1 NMR on methane gaussian_file = cctk.GaussianFile.read_file("test/static/methane2.out")