Skip to content

Commit

Permalink
merge conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
corinwagen committed Oct 5, 2020
2 parents e8811b1 + 89f38d4 commit fa87090
Show file tree
Hide file tree
Showing 8 changed files with 174 additions and 24 deletions.
13 changes: 13 additions & 0 deletions cctk/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,3 +596,16 @@ def get_geometric_parameters(self, parameter, atom1, atom2, atom3=None, atom4=No

return output

def assign_connectivity(self, index=0):
"""
Assigns connectivity for all molecules based on molecule of index ``index``. Much faster than assigning connectivity for each individually -- but assumes all bonding is the same.
"""
assert isinstance(index, int), "Need integer index"
bonds = self.molecules[index].assign_connectivity().bonds

for mol in self.molecules:
mol.bonds = bonds

return self


13 changes: 12 additions & 1 deletion cctk/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ class Molecule:
bonds (nx.Graph or list of tuples): connectivity graph or list of 2-tuples, with each element representing the 1-indexed atom number of a bonded pair
charge (int): the charge of the molecule
multiplicity (int): the spin state of the molecule (1 corresponds to singlet, 2 to doublet, 3 to triplet, etc. -- so a multiplicity of 1 is equivalent to S=0)
checks (bool): whether to check that the constructor parameters are valid
vibrational_modes (list of cctk.VibrationalMode): vibrational modes
"""

Expand All @@ -44,6 +43,9 @@ def __init__(self, atomic_numbers, geometry, name=None, bonds=None, charge=0, mu
Create new Molecule object, and assign connectivity if needed.
``bonds`` must be a list of edges (i.e. an n x 2 ``numpy`` array).
If ``checks`` is True, the atomic numbers in bonds will all be checked for consistency.
This option can be disabled by setting ``checks`` to False, but this is not recommended for external data.
"""
if len(atomic_numbers) != len(geometry):
raise ValueError(f"length of geometry ({len(geometry)}) and atomic_numbers ({len(atomic_numbers)}) does not match!\n{atomic_numbers}\n{geometry}")
Expand Down Expand Up @@ -1665,3 +1667,12 @@ def csearch(self, nprocs=1, constraints=[], logfile=None, noncovalent=False, use
import cctk.optimize as opt
assert isinstance(nprocs, int), "nprocs must be int!"
return opt.csearch(molecule=self, nprocs=nprocs, constraints=constraints, noncovalent=noncovalent, logfile=logfile, use_tempdir=use_tempdir)

def num_neighbors_by_atom(self):
"""
Returns a list of the number of neighbors of each atom.
"""
result = []
for i in range(self.num_atoms()):
result.append(len(self.get_adjacent_atoms(i)))
return result
124 changes: 110 additions & 14 deletions cctk/parse_gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,34 +11,52 @@
Functions to help with parsing Gaussian files
"""

def read_nmr_shifts(lines, num_atoms):
def read_nmr_shifts(blocks, num_atoms):
"""
Helper method to search through output file and read NMR shifts.
Args:
lines (list): list of lines in file
num_atoms (int): number of atoms expected
Returns:
list of isotropic NMR shifts (np.ndarray)
list of shielding tensors (list of 3x3 np.ndarray)
"""
# assumes that lines only come from one Link1 section
shieldings = []
for line in lines:
fields = line.split()
if len(fields) == 8 and fields[2] == "Isotropic" and fields[5] == "Anisotropy":
tensors = []
for block in blocks:
lines = block.split("\n")
tensor = np.zeros(shape=(3,3))
for line in lines:
fields = line.split()
assert len(fields) == 8, f"Expected 8 fields on an NMR shielding output line but found {len(fields)} instead!"
try:
shielding = float(fields[4])
except:
raise ValueError(f"Error parsing NMR shielding output line:\n{line}")
shieldings.append(shielding)
# there are 8 on each line but we truncate the first 2 in the block selection process
if len(fields) == 6 and fields[0] == "Isotropic" and fields[3] == "Anisotropy":
fields = line.split()
assert len(fields) == 6, f"Expected 6 fields on an NMR shielding output line but found {len(fields)} instead!"
try:
shielding = float(fields[2])
except:
raise ValueError(f"Error parsing NMR shielding output line:\n{line}")
shieldings.append(shielding)

# yes, this is very elegant.
tensor[0][0] = float(re.search("XX=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
tensor[0][1] = float(re.search("XY=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
tensor[0][2] = float(re.search("XZ=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
tensor[1][0] = float(re.search("YX=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
tensor[1][1] = float(re.search("YY=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
tensor[1][2] = float(re.search("YZ=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
tensor[2][0] = float(re.search("ZX=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
tensor[2][1] = float(re.search("ZY=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
tensor[2][2] = float(re.search("ZZ=\s+(?P<val>-?\d+\.\d+)", block).group("val"))
tensors.append(tensor)

if len(shieldings) is not 0:
assert len(shieldings) == num_atoms, f"Expected {num_atoms} shieldings but found {len(shieldings)}!"
return np.asarray(shieldings)
return np.asarray(shieldings), tensors
else:
#### we can catch this problem later if the file is finished
return None
return None, None

def split_link1(filename):
"""
Expand Down Expand Up @@ -106,6 +124,7 @@ def read_file_fast(file_text, filename, link1idx, max_len=10000, extended_opt_in
["Mulliken charges", "Sum of Mulliken charges", 1],
["Electronic spatial extent", "Quadrupole moment", 1], #10
["normal coordinates", "Thermochemistry", 1],
["Isotropic", "Eigenvalues", 1000],
]

word_matches = [[] for _ in words]
Expand Down Expand Up @@ -289,9 +308,10 @@ def read_file_fast(file_text, filename, link1idx, max_len=10000, extended_opt_in
properties[-1]["quasiharmonic_gibbs_free_energy"] = float(corrected_free_energy)

if cctk.GaussianJobType.NMR in job_types:
nmr_shifts = read_nmr_shifts(word_matches[16], molecules[0].num_atoms())
nmr_shifts, shielding_tensors = read_nmr_shifts(block_matches[12], molecules[0].num_atoms())
if nmr_shifts is not None:
properties[-1]["isotropic_shielding"] = nmr_shifts.view(cctk.OneIndexedArray)
properties[-1]["shielding_tensors"] = shielding_tensors

if re.search("nmr=mixed", f.route_card, flags=re.IGNORECASE) or re.search("nmr=spinspin", f.route_card,flags=re.IGNORECASE):
couplings = read_j_couplings(block_matches[6], molecules[0].num_atoms())
Expand All @@ -304,7 +324,7 @@ def read_file_fast(file_text, filename, link1idx, max_len=10000, extended_opt_in
properties[0]["forces"] = forces

if cctk.GaussianJobType.POP in job_types:
if re.search("hirshfeld", f.route_card) or re.search("cm5", f.route_card):
if re.search("hirshfeld", f.route_card) or re.search("cm5", f.route_card) and len(block_matches[8]) > 0:
charges, spins = parse_hirshfeld(block_matches[8])
properties[-1]["hirshfeld_charges"] = charges
properties[-1]["hirshfeld_spins"] = spins
Expand Down Expand Up @@ -467,6 +487,10 @@ def parse_charges_dipole(mulliken_block, dipole_block):
def parse_hirshfeld(hirshfeld_block):
charges = []
spins = []

if len(hirshfeld_block) == 0:
return None, None

for line in hirshfeld_block[0].split("\n")[2:]:
fields = re.split(" +", line)
fields = list(filter(None, fields))
Expand Down Expand Up @@ -567,3 +591,75 @@ def parse_modes(freq_block, num_atoms, hpmodes=False):
modes.append(cctk.VibrationalMode(frequency=f, reduced_mass=m, force_constant=k, displacements=d))

return modes

def read_j_couplings(lines, n_atoms):
"""
Helper method to search through output file and read J couplings
Args:
lines (list): list of lines in file
n_atoms (int): how many atoms are in the molecule
Returns:
``couplings`` symmetric 2D np.array of couplings (in Hz) with zero-indexed atoms on both axes
or None if no couplings were found
"""
couplings = np.zeros((n_atoms,n_atoms))
n_full_blocks, lines_in_partial_block = divmod(n_atoms,5)
n_lines = 5 * (n_full_blocks * (n_full_blocks+1) / 2) + n_full_blocks + 1
if lines_in_partial_block > 0:
n_lines += 1 + lines_in_partial_block
n_lines = int(n_lines)

lines = lines[0].split("\n")

i = 0
read_column_indices = False
read_row = False
this_column_indices = []
while i < n_lines:
# get current line
line = lines[i]

# if this is the header, we should be reading the column indices next
if "Total nuclear spin-spin coupling J (Hz):" in line:
i += 1
read_column_indices = True
continue

# this is not the header, so split the fields
fields = line.split()

# read the column indices
if read_column_indices:
# this_n_columns = len(fields)
this_column_indices = [ int(j)-1 for j in fields ]
i += 1
read_column_indices = False
read_row = True
continue
elif read_row:
row = int(fields[0])-1
for j,value in enumerate(fields[1:]):
column = this_column_indices[j]
value = value.replace("D","E")
value = float(value)
couplings[row,column] = value
couplings[column,row] = value

# check if we have read the entire matrix
if row == n_atoms - 1 and column == n_atoms - 1:
break

# check if this is the end of the current block
if row == n_atoms - 1:
read_column_indices = True
read_row = False
i += 1
continue

read_row = True
i += 1
continue
else:
raise ValueError("impossible")

return couplings
2 changes: 1 addition & 1 deletion docs/recipe_01.rst
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ Using Custom Basis Sets from the Basis Set Exchange

"""""""""""""""""""""""""""
Creating Molecules By Name
""""""""""""""""""""""""""
"""""""""""""""""""""""""""

- If ``rdkit`` is installed, then molecules can be created from a name or SMILES string. Structures should be checked for sanity!

Expand Down
20 changes: 19 additions & 1 deletion docs/recipe_06.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ NMR Spectroscopy
Chemical Shifts
"""""""""""""""

- ``import cctk`` is assumed.
- Isotropic shieldings are read automatically.
- The ``scale_nmr_shifts`` function will apply scaling factors to the shieldings
to produce chemical shift predictions.
Expand Down Expand Up @@ -107,4 +106,23 @@ Coupling Constants
self.assertTrue(np.any(expected_couplings-couplings < 0.1))


""""""""""""""""""
Shielding Tensors
""""""""""""""""""

- The 3 by 3 shielding tensors are stored in ``shielding_tensors`` in ``properties_dict``.
- Each tensor is a ``np.ndarray`` (0-indexed), stored in a 0-indexed list.

::

gaussian_file = cctk.GaussianFile.read_file("test/static/methane.out")
ensemble = gaussian_file.ensemble
shielding_tensors = ensemble[-1,"shielding_tensors"]
print(shielding_tensors(1))

# will print
# array([[32.6869, 3.0191, 3.0191],
# [ 3.0191, 32.6869, 3.0191],
# [ 3.0191, 3.0191, 32.6869]]


2 changes: 1 addition & 1 deletion docs/recipe_08.rst
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ Epimerization

"""""""""""""""""
Detecting Groups
""""""""""""""""
"""""""""""""""""

- *cctk* can search within existing molecules for groups and return the relevant atom numbers.

Expand Down
20 changes: 14 additions & 6 deletions scripts/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def main():
parser.add_argument("--h", action="store_true")
parser.add_argument("--standard_state", action="store_true", default=False)
parser.add_argument("--correct_gibbs", action="store_true", default=False)
parser.add_argument("--sort", action="store_true", default=False)
parser.add_argument("filename")
args = vars(parser.parse_args(sys.argv[1:]))

Expand Down Expand Up @@ -83,23 +84,30 @@ def main():
if not isinstance(values[0], list):
values = [values]

df = pd.DataFrame(values, columns=property_names).fillna("")
df = pd.DataFrame(values, columns=property_names)
df.sort_values("filename", inplace=True)

if args["correct_gibbs"]:
df.rename(columns={"quasiharmonic_gibbs_free_energy": "gibbs_free_energy"}, inplace=True)

if args["g"]:
df["rel_energy"] = (df.quasiharmonic_gibbs_free_energy - df.quasiharmonic_gibbs_free_energy.min()) * 627.509469
print(df.columns)
df = df.dropna(subset=["gibbs_free_energy"])
df["rel_energy"] = (df.gibbs_free_energy - df.gibbs_free_energy.min()) * 627.509469
elif args["h"]:
df = df.dropna(subset=["enthalpy"])
df["rel_energy"] = (df.enthalpy - df.enthalpy.min()) * 627.509469
else:
try:
df["rel_energy"] = (df.energy - df.energy.min()) * 627.509469
except:
df["rel_energy"] = 0

if args["correct_gibbs"]:
df.rename(columns={"rms_displacement": "rms_disp", "quasiharmonic_gibbs_free_energy": "GFE"}, inplace=True)
else:
df.rename(columns={"rms_displacement": "rms_disp", "gibbs_free_energy": "GFE"}, inplace=True)
if args["sort"]:
df.sort_values("rel_energy", inplace=True)

df.fillna("")
df.rename(columns={"rms_displacement": "rms_disp", "gibbs_free_energy": "GFE"}, inplace=True)

if args["standard_state"]:
# convert to 1 M standard state = 6.354 e.u. * 298 K / 4184 J/kcal
Expand Down
4 changes: 4 additions & 0 deletions test/test_nmr.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ def test_nmr1(self):
shieldings = ensemble[:,"isotropic_shielding"]
self.assertListEqual(list(shieldings), [198.2259, 32.6869, 32.6869, 32.6869, 32.6869])

tensors = ensemble[:, "shielding_tensors"]
self.assertEqual(tensors[0][0][0], 198.2259)
self.assertEqual(tensors[1][0][0], 32.6869)

def test_nmr2(self):
# this file contains opt freq followed by Link1 NMR on methane
gaussian_file = cctk.GaussianFile.read_file("test/static/methane2.out")
Expand Down

0 comments on commit fa87090

Please sign in to comment.