diff --git a/emmet-core/emmet/core/qc_tasks.py b/emmet-core/emmet/core/qc_tasks.py index b06b4a216c..26b9c856c4 100644 --- a/emmet-core/emmet/core/qc_tasks.py +++ b/emmet-core/emmet/core/qc_tasks.py @@ -186,17 +186,26 @@ def from_qchem_calc_doc(cls, calc_doc: Calculation) -> "InputDoc": InputDoc A summary of the input molecule and corresponding calculation parameters """ + try: + lot_val = calc_doc.level_of_theory.value + except AttributeError: + lot_val = calc_doc.level_of_theory + + try: + ct_val = calc_doc.calc_type.value + except AttributeError: + ct_val = calc_doc.calc_type # TODO : modify this to get the different variables from the task doc. return cls( initial_molecule=calc_doc.input.initial_molecule, rem=calc_doc.input.rem, - level_of_theory=calc_doc.level_of_theory.value, + level_of_theory=lot_val, task_type=calc_doc.task_type.value, tags=calc_doc.input.tags, solvation_lot_info=calc_doc.solvation_lot_info, # special_run_type = calc_doc.input.special_run_type, # smiles = calc_doc.input.smiles, - calc_type=calc_doc.calc_type.value, + calc_type=ct_val, ) @@ -281,6 +290,7 @@ class TaskDoc(MoleculeMetadata): def from_directory( cls: Type[_T], dir_name: Union[Path, str], + validate_lot: bool = True, store_additional_json: bool = True, additional_fields: Dict[str, Any] = None, **qchem_calculation_kwargs, @@ -292,6 +302,9 @@ def from_directory( ---------- dir_name The path to the folder containing the calculation outputs. + validate_lot + Flag for matching the basis and functional with the list of functionals consistent with MPCules. + Defaults to True. Change to False if you want to create a TaskDoc with other basis sets and functionals. store_additional_json Whether to store additional json files in the calculation directory. additional_fields @@ -322,7 +335,11 @@ def from_directory( continue else: calc_doc = Calculation.from_qchem_files( - dir_name, task_name, **files, **qchem_calculation_kwargs + dir_name, + task_name, + **files, + **qchem_calculation_kwargs, + validate_lot=validate_lot, ) calcs_reversed.append(calc_doc) # all_qchem_objects.append(qchem_objects) diff --git a/emmet-core/emmet/core/qchem/calculation.py b/emmet-core/emmet/core/qchem/calculation.py index 00df5b30c9..b284ad0ba2 100644 --- a/emmet-core/emmet/core/qchem/calculation.py +++ b/emmet-core/emmet/core/qchem/calculation.py @@ -7,6 +7,7 @@ from typing import Any, Dict, List, Optional, Union import numpy as np +import warnings from pydantic import field_validator, BaseModel, Field, ConfigDict from datetime import datetime from pymatgen.io.qchem.inputs import QCInput @@ -316,7 +317,7 @@ class Calculation(BaseModel): None, description="Paths (relative to dir_name) of the QChem output files associated with this calculation", ) - level_of_theory: LevelOfTheory = Field( + level_of_theory: Union[LevelOfTheory, str] = Field( None, description="Levels of theory used for the QChem calculation: For instance, B97-D/6-31g*", ) @@ -328,7 +329,7 @@ class Calculation(BaseModel): None, description="Calculation task type like Single Point, Geometry Optimization. Frequency...", ) - calc_type: CalcType = Field( + calc_type: Union[CalcType, str] = Field( None, description="Combination dict of LOT + TaskType: B97-D/6-31g*/VACUUM Geometry Optimization", ) @@ -340,6 +341,7 @@ def from_qchem_files( task_name: str, qcinput_file: Union[Path, str], qcoutput_file: Union[Path, str], + validate_lot: bool = True, store_energy_trajectory: bool = False, qcinput_kwargs: Optional[Dict] = None, qcoutput_kwargs: Optional[Dict] = None, @@ -410,10 +412,10 @@ def from_qchem_files( else {k2: Path(v2) for k2, v2 in v.items()} for k, v in output_file_paths.items() }, - level_of_theory=level_of_theory(input_doc), - solvation_lot_info=lot_solvent_string(input_doc), + level_of_theory=level_of_theory(input_doc, validate_lot=validate_lot), + solvation_lot_info=lot_solvent_string(input_doc, validate_lot=validate_lot), task_type=task_type(input_doc), - calc_type=calc_type(input_doc), + calc_type=calc_type(input_doc, validate_lot=validate_lot), ) @@ -501,7 +503,9 @@ def _find_qchem_files( return task_files -def level_of_theory(parameters: CalculationInput) -> LevelOfTheory: +def level_of_theory( + parameters: CalculationInput, validate_lot: bool = True +) -> LevelOfTheory: """ Returns the level of theory for a calculation, @@ -532,19 +536,8 @@ def level_of_theory(parameters: CalculationInput) -> LevelOfTheory: basis_lower = basis_raw.lower() - functional = [f for f in FUNCTIONALS if f.lower() == funct_lower] - if not functional: - raise ValueError(f"Unexpected functional {funct_lower}!") - - functional = functional[0] - - basis = [b for b in BASIS_SETS if b.lower() == basis_lower] - if not basis: - raise ValueError(f"Unexpected basis set {basis_lower}!") - - basis = basis[0] - solvent_method = parameters.rem.get("solvent_method", "").lower() + if solvent_method == "": solvation = "VACUUM" elif solvent_method in ["pcm", "cosmo"]: @@ -560,12 +553,44 @@ def level_of_theory(parameters: CalculationInput) -> LevelOfTheory: else: raise ValueError(f"Unexpected implicit solvent method {solvent_method}!") - lot = f"{functional}/{basis}/{solvation}" + if validate_lot: + functional = [f for f in FUNCTIONALS if f.lower() == funct_lower] + if not functional: + raise ValueError(f"Unexpected functional {funct_lower}!") - return LevelOfTheory(lot) + functional = functional[0] + basis = [b for b in BASIS_SETS if b.lower() == basis_lower] + if not basis: + raise ValueError(f"Unexpected basis set {basis_lower}!") + + basis = basis[0] + + lot = f"{functional}/{basis}/{solvation}" + + return LevelOfTheory(lot) + else: + warnings.warn( + "User has turned the validate flag off." + "This can have downstream effects if the chosen functional and basis " + "is not in the available sets of MP employed functionals and the user" + "wants to include the TaskDoc in the MP infrastructure." + "Users should ignore this warning if their objective is just to create TaskDocs", + UserWarning, + stacklevel=2, + ) + functional = funct_lower + basis = basis_lower + lot = f"{functional}/{basis}/{solvation}" -def solvent(parameters: CalculationInput, custom_smd: Optional[str] = None) -> str: + return lot + + +def solvent( + parameters: CalculationInput, + validate_lot: bool = True, + custom_smd: Optional[str] = None, +) -> str: """ Returns the solvent used for this calculation. @@ -574,9 +599,11 @@ def solvent(parameters: CalculationInput, custom_smd: Optional[str] = None) -> s custom_smd: (Optional) string representing SMD parameters for a non-standard solvent """ - - lot = level_of_theory(parameters) - solvation = lot.value.split("/")[-1] + lot = level_of_theory(parameters, validate_lot=validate_lot) + if validate_lot: + solvation = lot.value.split("/")[-1] + else: + solvation = lot.split("/")[-1] if solvation == "PCM": # dielectric = float(parameters.get("solvent", {}).get("dielectric", 78.39)) @@ -631,7 +658,9 @@ def solvent(parameters: CalculationInput, custom_smd: Optional[str] = None) -> s def lot_solvent_string( - parameters: CalculationInput, custom_smd: Optional[str] = None + parameters: CalculationInput, + validate_lot: bool = True, + custom_smd: Optional[str] = None, ) -> str: """ Returns a string representation of the level of theory and solvent used for this calculation. @@ -641,9 +670,11 @@ def lot_solvent_string( custom_smd: (Optional) string representing SMD parameters for a non-standard solvent """ - - lot = level_of_theory(parameters).value - solv = solvent(parameters, custom_smd=custom_smd) + if validate_lot: + lot = level_of_theory(parameters, validate_lot=validate_lot).value + else: + lot = level_of_theory(parameters, validate_lot=validate_lot) + solv = solvent(parameters, custom_smd=custom_smd, validate_lot=validate_lot) return f"{lot}({solv})" @@ -670,7 +701,9 @@ def task_type( def calc_type( - parameters: CalculationInput, special_run_type: Optional[str] = None + parameters: CalculationInput, + validate_lot: bool = True, + special_run_type: Optional[str] = None, ) -> CalcType: """ Determines the calc type @@ -678,6 +711,10 @@ def calc_type( Args: parameters: CalculationInput parameters """ - rt = level_of_theory(parameters).value tt = task_type(parameters, special_run_type=special_run_type).value - return CalcType(f"{rt} {tt}") + if validate_lot: + rt = level_of_theory(parameters, validate_lot=validate_lot).value + return CalcType(f"{rt} {tt}") + else: + rt = level_of_theory(parameters, validate_lot=validate_lot) + return str(f"{rt} {tt}") diff --git a/emmet-core/tests/conftest_qchem.py b/emmet-core/tests/conftest_qchem.py index b98cb69188..878cb99574 100644 --- a/emmet-core/tests/conftest_qchem.py +++ b/emmet-core/tests/conftest_qchem.py @@ -145,7 +145,7 @@ class SinglePointTest(SchemaTestData): "level_of_theory": "wB97M-V/def2-QZVPPD/SMD", "task_type": "Single Point", "calc_type": "wB97M-V/def2-QZVPPD/SMD Single Point", - "solvation_lot_nfo": "wB97M-V/def2-QZVPPD/SMD(SOLVENT=WATER)", + "solvation_lot_info": "wB97M-V/def2-QZVPPD/SMD(SOLVENT=WATER)", }, "output": { "mulliken": [np.array([-0.713178, 0.357278, 0.3559])], @@ -301,7 +301,7 @@ class OptimizationTest(SchemaTestData): "level_of_theory": "wB97M-V/def2-SVPD/SMD", "task_type": "Geometry Optimization", "calc_type": "wB97M-V/def2-SVPD/SMD Geometry Optimization", - "solvation_lot_nfo": "wB97M-V/def2-SVPD/SMD(SOLVENT=WATER)", + "solvation_lot_info": "wB97M-V/def2-SVPD/SMD(SOLVENT=WATER)", }, "output": { "initial_molecule": { diff --git a/emmet-core/tests/test_qc_task.py b/emmet-core/tests/test_qc_task.py index d47c41a23d..036c4b7682 100644 --- a/emmet-core/tests/test_qc_task.py +++ b/emmet-core/tests/test_qc_task.py @@ -88,3 +88,33 @@ def test_task_doc(test_dir, object_name): # Test that additional_fields works test_doc = TaskDoc.from_directory(dir_name, additional_fields={"foo": "bar"}) assert test_doc.model_dump()["additional_fields"] == {"foo": "bar"} + + +@pytest.mark.parametrize( + "object_name", + [ + pytest.param("SinglePointTest", id="SinglePointTest"), + pytest.param("OptimizationTest", id="OptimizationTest"), + ], +) +def test_task_doc_val_flag(test_dir, object_name): + from monty.json import MontyDecoder, jsanitize + from emmet.core.qc_tasks import TaskDoc + + test_object = get_test_object(object_name) + dir_name = test_dir / "qchem" / test_object.folder + print(f"The test object is {test_object.task_doc}") + test_doc = TaskDoc.from_directory(dir_name, validate_lot=False) + assert_schemas_equal(test_doc, test_object.task_doc) + + # test document can be jsanitized + d = jsanitize(test_doc, strict=True, enum_values=True, allow_bson=True) + + # and decoded + MontyDecoder().process_decoded(d) + + # Test that additional_fields works + test_doc = TaskDoc.from_directory( + dir_name, validate_lot=False, additional_fields={"foo": "bar"} + ) + assert test_doc.model_dump()["additional_fields"] == {"foo": "bar"}