From 17b48ab9731612a7c2810555948019e0920bd36b Mon Sep 17 00:00:00 2001 From: rdguha1995 Date: Tue, 6 Feb 2024 16:56:18 -0800 Subject: [PATCH] Resolving Issues #929 and #930 (#932) * Adding qc_tasks and calculation.py * big_commit for implementing the drone fucntionality of atomate(qchem) in emmet * ran pre-commit locally and some minor changes * writing_unit_tests_for_sp_and_opt * ran pre-commit on test files * corrected Union type error * added numpy custom validators * datetime import problem * allowing arbitrary types * further tests * further tests * change in io file convention * checking qcinput * checking qcinput * Incorporating all the pydantic 2 changes * changes in lot, task_type, calc_type * removing circular dependency * calc_doc issue * Make more fields optional in accordance with pydantic 2 * Corrected the Input Doc problems * CalcInput smx attribute issue * correcting input to qcinput and qcoutput * changes in the Optimization test doc for inputs * molecule -> initial_molecule * changes to the sp valid task_schema * test_output breakdowns * test_output breakdowns OutputDoc * test_output breakdowns OutputDoc * test_output breakdowns OutputDoc * test_output breakdowns OutputDoc * test_output breakdowns OutputDoc * test_output breakdowns OutputDoc * Changes to the TaskDoc * Changes to the TaskDoc np.array * Changes to the conftest * Changes to the conftest arrays * Changes to the conftest arrays * Changes to test code * Changes to test code * Changes to test code * Changes to test code * Changes to test code * Changes to test code * Changes to test code * Changes to test code * Changes to test code * Changes to test code * fixing bug where solvent field was being accessed as a dict * forgot pre-commit * Changed the default args for initial_molecule and optimized_molecule to be Molecule not dict * deleted the superfluous FW files * making the TaskDoc.from_directory functionality for generalized to handle qchem calculaion directories not generated through atomate * resolved the bugs with enthalpy, entropy and parsing frequencies --- emmet-core/emmet/core/qc_tasks.py | 24 +++++++---- emmet-core/emmet/core/qchem/calculation.py | 46 ++++++++++++++++------ 2 files changed, 52 insertions(+), 18 deletions(-) diff --git a/emmet-core/emmet/core/qc_tasks.py b/emmet-core/emmet/core/qc_tasks.py index 074723042d..b06b4a216c 100644 --- a/emmet-core/emmet/core/qc_tasks.py +++ b/emmet-core/emmet/core/qc_tasks.py @@ -76,9 +76,14 @@ class OutputDoc(BaseModel): None, description="Natural Bonding Orbital (NBO) output" ) + frequencies: Optional[Union[Dict[str, Any], List]] = Field( + None, + description="The list of calculated frequencies if job type is freq (units: cm^-1)", + ) + frequency_modes: Optional[Union[List, str]] = Field( None, - description="The list of calculated frequency mode vectors if job type is freq (units: cm^-1)", + description="The list of calculated frequency mode vectors if job type is freq", ) @classmethod @@ -112,6 +117,7 @@ def from_qchem_calc_doc(cls, calc_doc: Calculation) -> "OutputDoc": resp=calc_doc.output.resp, nbo=calc_doc.output.nbo_data, frequencies=calc_doc.output.frequencies, + frequency_modes=calc_doc.output.frequency_modes, ) @@ -588,7 +594,9 @@ def _find_qchem_files( ) in_task_name = in_task_name or "mol.qin" if in_task_name == "orig": - task_files[in_task_name] = {"orig_input_file": file} + task_files[in_task_name] = {"orig_input_file": file.name} + elif in_task_name == "last": + continue elif in_task_name == "mol.qin" or in_task_name == "mol.in": if in_task_name == "mol.qin": out_file = ( @@ -603,19 +611,21 @@ def _find_qchem_files( else path / "mol.out" ) task_files["standard"] = { - "qcinput_file": file, - "qcoutput_file": out_file, + "qcinput_file": file.name, + "qcoutput_file": out_file.name, } # This block will exist only if calcs were run through atomate else: try: task_files[in_task_name] = { - "qcinput_file": file, - "qcoutput_file": Path("mol.qout." + in_task_name + ".gz"), + "qcinput_file": file.name, + "qcoutput_file": Path( + "mol.qout." + in_task_name + ".gz" + ).name, } except FileNotFoundError: task_files[in_task_name] = { - "qcinput_file": file, + "qcinput_file": file.name, "qcoutput_file": "No qout files exist for this in file", } diff --git a/emmet-core/emmet/core/qchem/calculation.py b/emmet-core/emmet/core/qchem/calculation.py index 28d5acd8fb..00df5b30c9 100644 --- a/emmet-core/emmet/core/qchem/calculation.py +++ b/emmet-core/emmet/core/qchem/calculation.py @@ -274,8 +274,8 @@ def from_qcoutput(cls, qcoutput: QCOutput) -> "CalculationOutput": frequencies=qcoutput.data.get("frequencies", {}), frequency_modes=qcoutput.data.get("frequency_mode_vectors", []), final_energy=qcoutput.data.get("final_energy", None), - enthalpy=qcoutput.data.get("enthalpy", None), - entropy=qcoutput.data.get("entropy", None), + enthalpy=qcoutput.data.get("total_enthalpy", None), + entropy=qcoutput.data.get("total_entropy", None), scan_energies=qcoutput.data.get("scan_energies", {}), scan_geometries=qcoutput.data.get("optimized_geometries", {}), scan_molecules=qcoutput.data.get("molecules_from_optimized_geometries", {}), @@ -448,29 +448,53 @@ def _find_qchem_files( path = Path(path) task_files = OrderedDict() - in_file_pattern = re.compile(r"^(?Pmol\.qin(?:\..+)?)\.gz$") + in_file_pattern = re.compile(r"^(?Pmol\.(qin|in)(?:\..+)?)(\.gz)?$") for file in path.iterdir(): if file.is_file(): in_match = in_file_pattern.match(file.name) + + # This block is for generalizing outputs coming from both atomate and manual qchem calculations if in_match: - in_task_name = in_match.group("in_task_name").replace("mol.qin.", "") + in_task_name = re.sub( + r"(\.gz|gz)$", + "", + in_match.group("in_task_name").replace("mol.qin.", ""), + ) + in_task_name = in_task_name or "mol.qin" if in_task_name == "orig": - task_files[in_task_name] = {"orig_input_file": file} - elif in_task_name == "mol.qin": + task_files[in_task_name] = {"orig_input_file": file.name} + elif in_task_name == "last": + continue + elif in_task_name == "mol.qin" or in_task_name == "mol.in": + if in_task_name == "mol.qin": + out_file = ( + path / "mol.qout.gz" + if (path / "mol.qout.gz").exists() + else path / "mol.qout" + ) + else: + out_file = ( + path / "mol.out.gz" + if (path / "mol.out.gz").exists() + else path / "mol.out" + ) task_files["standard"] = { - "qcinput_file": file, - "qcoutput_file": Path("mol.qout.gz"), + "qcinput_file": file.name, + "qcoutput_file": out_file.name, } + # This block will exist only if calcs were run through atomate else: try: task_files[in_task_name] = { - "qcinput_file": file, - "qcoutput_file": Path("mol.qout." + in_task_name + ".gz"), + "qcinput_file": file.name, + "qcoutput_file": Path( + "mol.qout." + in_task_name + ".gz" + ).name, } except FileNotFoundError: task_files[in_task_name] = { - "qcinput_file": file, + "qcinput_file": file.name, "qcoutput_file": "No qout files exist for this in file", }