Skip to content

Commit

Permalink
Merge pull request #2562 from sudarshanv01/qchem_io_eigenvals
Browse files Browse the repository at this point in the history
Parsing the Fock matrix and eigenvalues from the QChem output file
  • Loading branch information
mkhorton authored Nov 1, 2022
2 parents 553dc23 + 15807db commit 03691a8
Show file tree
Hide file tree
Showing 7 changed files with 19,657 additions and 6 deletions.
144 changes: 139 additions & 5 deletions pymatgen/io/qchem/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,16 @@
from pymatgen.core import Molecule
from pymatgen.io.qchem.utils import (
process_parsed_coords,
process_parsed_fock_matrix,
read_matrix_pattern,
read_pattern,
read_table_pattern,
)

try:

have_babel = True
from openbabel import openbabel
except ImportError:
openbabel = None
have_babel = False


__author__ = "Samuel Blau, Brandon Wood, Shyam Dwaraknath, Evan Spotte-Smith"
Expand Down Expand Up @@ -111,6 +111,17 @@ def __init__(self, filename: str):
terminate_on_match=True,
).get("key")

# Get the value of scf_final_print in the output file
scf_final_print = read_pattern(
self.text,
{"key": r"scf_final_print\s*=\s*(\d+)"},
terminate_on_match=True,
).get("key")
if scf_final_print is not None:
self.data["scf_final_print"] = int(scf_final_print[0][0])
else:
self.data["scf_final_print"] = 0

# Check if calculation uses GEN_SCFMAN, multiple potential output formats
self.data["using_GEN_SCFMAN"] = read_pattern(
self.text,
Expand Down Expand Up @@ -337,6 +348,15 @@ def __init__(self, filename: str):
if self.data.get("force_job", []):
self._read_force_data()

# Read in the eigenvalues from the output file
if self.data["scf_final_print"] >= 1:
self._read_eigenvalues()

# Read the Fock matrix from the output file
if self.data["scf_final_print"] >= 3:
self._read_fock_matrix()
self._read_coefficient_matrix()

# Check if the calculation is a PES scan. If so, parse the relevant output
self.data["scan_job"] = read_pattern(
self.text, {"key": r"(?i)\s*job(?:_)*type\s*(?:=)*\s*pes_scan"}, terminate_on_match=True
Expand Down Expand Up @@ -379,6 +399,120 @@ def multiple_outputs_from_file(cls, filename, keep_sub_files=True):
os.remove(filename + "." + str(i))
return to_return

def _read_eigenvalues(self):
"""Parse the orbital energies from the output file. An array of the
dimensions of the number of orbitals used in the calculation is stored."""

# Find the pattern corresponding to the "Final Alpha MO Eigenvalues" section
header_pattern = r"Final Alpha MO Eigenvalues"
# The elements of the matrix are always floats, they are surrounded by
# the index of the matrix, which are integers.
elements_pattern = r"\-*\d+\.\d+"
if not self.data.get("unrestricted", []):
spin_unrestricted = False
# Since this is a spin-paired calculation, there will be
# only a single list of orbital energies (denoted as alpha)
# in the input file. The footer will then correspond
# to the next section, which is the MO coefficients.
footer_pattern = r"Final Alpha MO Coefficients+\s*"
else:
spin_unrestricted = True
# It is an unrestricted calculation, so there will be two sets
# of eigenvalues. First parse the alpha eigenvalues.
footer_pattern = r"Final Beta MO Eigenvalues"
# Common for both spin restricted and unrestricted calculations is the alpha eigenvalues.
alpha_eigenvalues = read_matrix_pattern(
header_pattern, footer_pattern, elements_pattern, self.text, postprocess=float
)
# The beta eigenvalues are only present if this is a spin-unrestricted calculation.
if spin_unrestricted:
header_pattern = r"Final Beta MO Eigenvalues"
footer_pattern = r"Final Alpha MO Coefficients+\s*"
beta_eigenvalues = read_matrix_pattern(
header_pattern, footer_pattern, elements_pattern, self.text, postprocess=float
)

self.data["alpha_eigenvalues"] = alpha_eigenvalues

if spin_unrestricted:
self.data["beta_eigenvalues"] = beta_eigenvalues

def _read_fock_matrix(self):
"""Parses the Fock matrix. The matrix is read in whole
from the output file and then transformed into the right dimensions."""

# The header is the same for both spin-restricted and spin-unrestricted calculations.
header_pattern = r"Final Alpha Fock Matrix"
# The elements of the matrix are always floats, they are surrounded by
# the index of the matrix, which are integers
elements_pattern = r"\-*\d+\.\d+"
if not self.data.get("unrestricted", []):
spin_unrestricted = False
footer_pattern = "SCF time:"
else:
spin_unrestricted = True
footer_pattern = "Final Beta Fock Matrix"
# Common for both spin restricted and unrestricted calculations is the alpha Fock matrix.
alpha_fock_matrix = read_matrix_pattern(
header_pattern, footer_pattern, elements_pattern, self.text, postprocess=float
)
# The beta Fock matrix is only present if this is a spin-unrestricted calculation.
if spin_unrestricted:
header_pattern = r"Final Beta Fock Matrix"
footer_pattern = "SCF time:"
beta_fock_matrix = read_matrix_pattern(
header_pattern, footer_pattern, elements_pattern, self.text, postprocess=float
)

# Convert the matrices to the right dimension. Right now they are simply
# one massive list of numbers, but we need to split them into a matrix. The
# Fock matrix must always be a square matrix and as a result, we have to
# modify the dimensions.
alpha_fock_matrix = process_parsed_fock_matrix(alpha_fock_matrix)
self.data["alpha_fock_matrix"] = alpha_fock_matrix

if spin_unrestricted:
# Perform the same transformation for the beta Fock matrix.
beta_fock_matrix = process_parsed_fock_matrix(beta_fock_matrix)
self.data["beta_fock_matrix"] = beta_fock_matrix

def _read_coefficient_matrix(self):
"""Parses the coefficient matrix from the output file. Done is much
the same was as the Fock matrix."""
# The header is the same for both spin-restricted and spin-unrestricted calculations.
header_pattern = r"Final Alpha MO Coefficients"
# The elements of the matrix are always floats, they are surrounded by
# the index of the matrix, which are integers
elements_pattern = r"\-*\d+\.\d+"
if not self.data.get("unrestricted", []):
spin_unrestricted = False
footer_pattern = "Final Alpha Density Matrix"
else:
spin_unrestricted = True
footer_pattern = "Final Beta MO Coefficients"
# Common for both spin restricted and unrestricted calculations is the alpha Fock matrix.
alpha_coeff_matrix = read_matrix_pattern(
header_pattern, footer_pattern, elements_pattern, self.text, postprocess=float
)
if spin_unrestricted:
header_pattern = r"Final Beta MO Coefficients"
footer_pattern = "Final Alpha Density Matrix"
beta_coeff_matrix = read_matrix_pattern(
header_pattern, footer_pattern, elements_pattern, self.text, postprocess=float
)

# Convert the matrices to the right dimension. Right now they are simply
# one massive list of numbers, but we need to split them into a matrix. The
# Fock matrix must always be a square matrix and as a result, we have to
# modify the dimensions.
alpha_coeff_matrix = process_parsed_fock_matrix(alpha_coeff_matrix)
self.data["alpha_coeff_matrix"] = alpha_coeff_matrix

if spin_unrestricted:
# Perform the same transformation for the beta Fock matrix.
beta_coeff_matrix = process_parsed_fock_matrix(beta_coeff_matrix)
self.data["beta_coeff_matrix"] = beta_coeff_matrix

def _read_charge_and_multiplicity(self):
"""
Parses charge and multiplicity.
Expand Down Expand Up @@ -893,7 +1027,7 @@ def _read_optimization_data(self):
real_energy_trajectory[ii] = float(entry[0])
self.data["energy_trajectory"] = real_energy_trajectory
self._read_geometries()
if have_babel:
if openbabel is not None:
self.data["structure_change"] = check_for_structure_changes(
self.data["initial_molecule"],
self.data["molecule_from_last_geometry"],
Expand Down Expand Up @@ -1144,7 +1278,7 @@ def _read_scan_data(self):
self.data["energy_trajectory"] = real_energy_trajectory

self._read_geometries()
if have_babel:
if openbabel is not None:
self.data["structure_change"] = check_for_structure_changes(
self.data["initial_molecule"],
self.data["molecule_from_last_geometry"],
Expand Down
21 changes: 21 additions & 0 deletions pymatgen/io/qchem/sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def __init__(
new_geom_opt: dict | None = None,
overwrite_inputs: dict | None = None,
vdw_mode: Literal["atomic", "sequential"] = "atomic",
extra_scf_print: bool = False,
):
"""
Args:
Expand Down Expand Up @@ -124,6 +125,9 @@ def __init__(
In 'atomic' mode (default), dict keys represent the atomic number associated with each
radius (e.g., '12' = carbon). In 'sequential' mode, dict keys represent the sequential
position of a single specific atom in the input structure.
extra_scf_print (bool): Whether to store extra information generated from the SCF
cycle. If switched on, the Fock Matrix, coefficients of MO and the density matrix
will be stored.
"""
self.molecule = molecule
self.job_type = job_type
Expand All @@ -142,6 +146,7 @@ def __init__(
self.new_geom_opt = new_geom_opt
self.overwrite_inputs = overwrite_inputs
self.vdw_mode = vdw_mode
self.extra_scf_print = extra_scf_print

pcm_defaults = {
"heavypoints": "194",
Expand Down Expand Up @@ -303,6 +308,17 @@ def __init__(
for k, v in temp_opts.items():
myopt[k] = v

if extra_scf_print:
# Allow for the printing of the Fock matrix and the eigenvales
myrem["scf_final_print"] = "3"
# If extra_scf_print is specified, make sure that the convergence of the
# SCF cycle is at least 1e-8. Anything less than that might not be appropriate
# for printing out the Fock Matrix and coefficients of the MO.
if "scf_convergence" not in myrem:
myrem["scf_convergence"] = "8"
elif int(myrem["scf_convergence"]) < 8:
myrem["scf_convergence"] = "8"

super().__init__(
self.molecule,
rem=myrem,
Expand Down Expand Up @@ -348,6 +364,7 @@ def __init__(
nbo_params: dict | None = None,
overwrite_inputs: dict | None = None,
vdw_mode: Literal["atomic", "sequential"] = "atomic",
extra_scf_print: bool = False,
):
"""
Args:
Expand Down Expand Up @@ -403,6 +420,9 @@ def __init__(
In 'atomic' mode (default), dict keys represent the atomic number associated with each
radius (e.g., '12' = carbon). In 'sequential' mode, dict keys represent the sequential
position of a single specific atom in the input structure.
extra_scf_print (bool): Whether to store extra information generated from the SCF
cycle. If switched on, the Fock Matrix, coefficients of MO and the density matrix
will be stored.
"""
self.basis_set = basis_set
self.scf_algorithm = scf_algorithm
Expand All @@ -421,6 +441,7 @@ def __init__(
nbo_params=nbo_params,
overwrite_inputs=overwrite_inputs,
vdw_mode=vdw_mode,
extra_scf_print=extra_scf_print,
)


Expand Down
2 changes: 1 addition & 1 deletion pymatgen/io/qchem/tests/single_job.json

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions pymatgen/io/qchem/tests/test_outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,12 @@
"ccsd_total_energy",
"ccsd(t)_correlation_energy",
"ccsd(t)_total_energy",
"alpha_fock_matrix",
"beta_fock_matrix",
"alpha_eigenvalues",
"beta_eigenvalues",
"alpha_coeff_matrix",
"beta_coeff_matrix",
}

if have_babel:
Expand Down Expand Up @@ -165,6 +171,7 @@
"new_qchem_files/ts.out",
"new_qchem_files/ccsd.qout",
"new_qchem_files/ccsdt.qout",
"extra_scf_print.qcout",
}

multi_job_out_names = {
Expand Down
26 changes: 26 additions & 0 deletions pymatgen/io/qchem/tests/test_sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,32 @@ def test_init(self):
self.assertEqual(test_SPSet.solvent, {})
self.assertEqual(test_SPSet.molecule, test_molecule)

def test_scf_extra_print(self):
test_molecule = QCInput.from_file(os.path.join(test_dir, "new_qchem_files/pcm.qin")).molecule
test_SPSet = SinglePointSet(molecule=test_molecule, extra_scf_print=True)
self.assertEqual(
test_SPSet.rem,
{
"job_type": "sp",
"gen_scfman": "true",
"basis": "def2-tzvppd",
"max_scf_cycles": "100",
"method": "wb97xd",
"scf_algorithm": "diis",
"xc_grid": "3",
"thresh": "14",
"s2thresh": "16",
"symmetry": "false",
"sym_ignore": "true",
"resp_charges": "true",
"scf_convergence": "8",
"scf_final_print": "3",
},
)
self.assertEqual(test_SPSet.pcm, {})
self.assertEqual(test_SPSet.solvent, {})
self.assertEqual(test_SPSet.molecule, test_molecule)

def test_pcm_init(self):
test_molecule = QCInput.from_file(os.path.join(test_dir, "new_qchem_files/pcm.qin")).molecule
test_SPSet = SinglePointSet(molecule=test_molecule, pcm_dielectric=10.0)
Expand Down
48 changes: 48 additions & 0 deletions pymatgen/io/qchem/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,25 @@ def read_pattern(text_str, patterns, terminate_on_match=False, postprocess=str):
return matches


def read_matrix_pattern(header_pattern, footer_pattern, elements_pattern, text, postprocess=str):
"""Parse a matrix to get the quantities in a numpy array."""

# Get the piece of text between the header and the footer
header_regex = re.compile(header_pattern)
footer_regex = re.compile(footer_pattern)

# Find the text between the header and footer
text_between_header_and_footer = text[header_regex.search(text).end() : footer_regex.search(text).start()]

# Get the elements
elements = re.findall(elements_pattern, text_between_header_and_footer)

# Apply postprocessing to all the elements
elements = [postprocess(e) for e in elements]

return elements


def read_table_pattern(
text_str,
header_pattern,
Expand Down Expand Up @@ -153,3 +172,32 @@ def process_parsed_coords(coords):
for jj in range(3):
geometry[ii, jj] = float(entry[jj])
return geometry


def process_parsed_fock_matrix(fock_matrix):
"""The Fock matrix is parsed as a list, while it should actually be
a square matrix, this function takes the list of finds the right dimensions
in order to reshape the matrix."""
total_elements = len(fock_matrix)
n_rows = int(np.sqrt(total_elements))
n_cols = n_rows

# Q-Chem splits the printing of the matrix into chunks of 6 elements
# per line. TODO: Is there a better way than to hard-code this?
chunks = 6 * n_rows
# Decide the indices of the chunks
chunk_indices = np.arange(chunks, total_elements, chunks)
# Split the arrays into the chunks
fock_matrix_chunks = np.split(fock_matrix, chunk_indices)

# Reshape the chunks into the matrix and populate the matrix
fock_matrix_reshaped = np.zeros(shape=(n_rows, n_cols), dtype=float)
index_cols = 0
for fock_matrix_chunk in fock_matrix_chunks:
n_cols_chunks = len(fock_matrix_chunk) / n_rows
n_cols_chunks = int(n_cols_chunks)
fock_matrix_chunk_reshaped = np.reshape(fock_matrix_chunk, (n_rows, n_cols_chunks))
fock_matrix_reshaped[:, index_cols : index_cols + n_cols_chunks] = fock_matrix_chunk_reshaped
index_cols += n_cols_chunks

return fock_matrix_reshaped
Loading

0 comments on commit 03691a8

Please sign in to comment.