diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..44b8c21 --- /dev/null +++ b/.gitignore @@ -0,0 +1,19 @@ +# Created by https://www.toptal.com/developers/gitignore/api/vscode +# Edit at https://www.toptal.com/developers/gitignore?templates=vscode + +### vscode ### +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +*.code-workspace + +# End of https://www.toptal.com/developers/gitignore/api/vscode + +###pycache## +__pycache__/ + + +# packaging +*.egg-info/ diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..74aede3 --- /dev/null +++ b/README.rst @@ -0,0 +1,177 @@ +CENSO - Commandline ENergetic SOrting of Conformer Rotamer Ensembles +==================================================================== + +This repository hosts the `CENSO` code for the refinement of Conformer Rotamer +Ensembles (CRE) as obtained from `CREST`. + + +Installation +------------ + +There are several options possible. The easiest is to use the packaged censo programs +(by use of Pyinstaller) which can be found at the release section. The packaged +censo is linked against GLIBC version 2.19 and will work for GLIBC version 2.19 and above. + +Other options to use censo are shown below: + + +Download the git repository and run: + +.. code:: + + $ pip install --upgrade pip + $ pip install --editable . + $ censo arg1 arg2 + + +Flexible Invocation +------------------- + +1) Treating the censo directory as a package and as the main script:: + + $ python3 -m censo arg1 arg2 + +2) Using the censo-runner.py wrapper:: + + $ ./censo-runner.py arg1 arg2 + +3) After installation with pip:: + + $ censo arg1 arg2 + + + +Getting started: +---------------- + +Create the remote configuration file .censorc where the user can adjust default +settings and provide paths to the external programs e.g. `xtb`, `crest`, `orca` ... + +.. code:: + + $ censo -newconfig + $ cp censorc-new /home/$USER/.censorc + # edit .censorc + vi /home/$USER/.ensorc + + +First explainations on the commandline arguments can be printed by: + +.. code:: + + $ censo --help + +The molecule numbering from the input structure ensemble is kept throughout the +entire program. There are several program parts which can be used to filter a structure +ensemble: + +0) Cheap prescreening (part0): Very fast DFT energies in order to improve upon the energy + description of the SQM method used to generate the input structure ensemble. + The (free) energies are evaluated on the input geometries (DFT unoptimized). + +1) Prescreening (part1): Improved DFT energies and accurate solvation energies (if needed). + The free energies are evaluated on the input geometries (DFT unoptimized). + +2) Optimization (part2): efficient structure ensemble optimization and + free energy calculation on DFT optimized geometries. + +3) Refinement (part3): Optional free energy refinement (on DFT optimized geometries). + +4) NMR properties (part4): Optional calculation of shielding and coupling constants on + populated conformers. + +5) Optical Rotation (part5): Optional calculation of optical rotatory dispersion + for the populated ensemble. + + +Usage: + +.. code:: + + # check if settings combinations match: + $ censo -inp structure_ensemble.xyz -part2 on -solvent h2o --checkinput + # start the calculation: + $ censo -inp structure_ensemble.xyz -part2 on -solvent h2o > censo.out 2> error.censo & + +Requirements: +------------- + +* newest xtb (currently: https://github.com/grimme-lab/xtb/releases/tag/bleed ) +* newest cefine https://github.com/grimme-lab/cefine/releases +* ORCA > version 4.1 + + +Furter information (will be ordered later on): + +* the file .censorc can be used in the current working directory and will be preferred to + the global configuration file in ~/.censorc +* a folder ~/.censo_assets/ will be created upon usage of censo +* ORCA has not been used extensively so please be careful, test calculations + and report possible "bad" settings +* To be efficient COSMO-RS calculations are not performed with BP86 but whith the functionals + for energy evaluation. + + + + +License +------- + +LGPL3 + + +Available solvation models: +--------------------------- + +Solvation models available for implicit effect on properties e.g. the +geometry (SM). And "additive" solvation models which return a solvation contribution +to free energy (Gibbs energy) of the choosen geometry (SMGSOLV). + +.. csv-table:: + :header: "programs", "solvation models", "comment" + + "Turbomole","COSMO", "(SM)" + "", "DCOSMO-RS","(SM)" + "COSMO-RS","COSMO-RS","(SMGSOLV) (only solvent model for evaluation at different temperatures)" + "ORCA", "CPCM", "(SM)" + "","SMD","(SM)" + "","SMD_GSOLV", "(SMGSOLV)" + "xTB","GBSA_Gsolv","(SMGSOLV)" + "","ALPB_Gsolv","(SMGSOLV)" + + + +For Turbomole user: +------------------- + +The amount of *ricore* for each calculation can be set in your `.cefinerc`. The same +holds for *maxcor* and/or *rpacor*. + +.. code:: + + $ echo "ricore 4000" > .cefinerc + $ echo "maxcor 4000" >> .cefinerc + $ echo "rpacor 4000" >> .cefinerc + + +Solvents: +--------- + +CENSO uses several QM-packages and not all solvents are available for all solvation +models throughout the QM-packages. +For this reason a user editable file is created in the folder: + + $ ~/.censo_assets/censo_solvents.json + +which contains a dictionary of all available solvent models and solvents. +If a solvent is not available with a certain solvent model, the user can then choose +a replacement solvent. E.g. if CCl4 is not available choose CHCl3. + +.. figure:: docs/src/solvents.png + :scale: 25% + :align: center + :alt: censo_solvents.json + + +The solvent file is directly used in `CENSO` and typos will cause calculations to crash! +Adding a new solvent is as easy as adding a new dictionary to the file. diff --git a/censo-runner.py b/censo-runner.py new file mode 100644 index 0000000..3a5a4d9 --- /dev/null +++ b/censo-runner.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python3 + +""" +Convenience wrapper for running censo directly from the source tree. +""" +import sys +from censo_qm.censo import main + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file diff --git a/censo_assets/censo_solvents.json b/censo_assets/censo_solvents.json new file mode 100644 index 0000000..0df1187 --- /dev/null +++ b/censo_assets/censo_solvents.json @@ -0,0 +1,218 @@ +{ + "acetone":{ + "cosmors": ["propanone_c0", "propanone_c0"], + "dcosmors": ["propanone", "propanone"], + "xtb": ["acetone", "acetone"], + "cpcm": ["acetone", "acetone"], + "smd": ["ACETONE", "ACETONE"], + "DC": 20.7 + }, + "chcl3":{ + "cosmors": ["chcl3_c0", "chcl3_c0"], + "dcosmors": ["chcl3", "chcl3"], + "xtb": ["chcl3", "chcl3"], + "cpcm": ["chloroform","chloroform"], + "smd": ["CHLOROFORM", "CHLOROFORM"], + "DC": 4.8 + }, + "acetonitrile":{ + "cosmors": ["acetonitrile_c0", "acetonitrile_c0"], + "dcosmors": ["acetonitrile", "acetonitrile"], + "xtb": ["acetonitrile", "acetonitrile"], + "cpcm": ["acetonitrile", "acetonitrile"], + "smd": ["ACETONITRILE", "ACETONITRILE"], + "DC": 36.6 + }, + "ch2cl2":{ + "cosmors": ["ch2cl2_c0", "ch2cl2_c0"], + "dcosmors": [null, "chcl3"], + "xtb": ["ch2cl2", "ch2cl2"], + "cpcm": ["CH2Cl2", "CH2Cl2"], + "smd": ["DICHLOROMETHANE", "DICHLOROMETHANE"], + "DC": 9.1 + }, + "dmso":{ + "cosmors": ["dimethylsulfoxide_c0", "dimethylsulfoxide_c0"], + "dcosmors": ["dimethylsulfoxide", "dimethylsulfoxide"], + "xtb": ["dmso", "dmso"], + "cpcm": ["DMSO", "DMSO"], + "smd": ["DIMETHYLSULFOXIDE", "DIMETHYLSULFOXIDE"], + "DC": 47.2 + }, + "h2o":{ + "cosmors": ["h2o_c0", "h2o_c0"], + "dcosmors": ["h2o", "h2o"], + "xtb": ["h2o", "h2o"], + "cpcm": ["Water", "Water"], + "smd": ["WATER", "WATER"], + "DC": 80.1 + }, + "methanol":{ + "cosmors": ["methanol_c0", "methanol_c0"], + "dcosmors": ["methanol", "methanol"], + "xtb": ["methanol", "methanol"], + "cpcm": ["Methanol", "Methanol"], + "smd": ["METHANOL", "METHANOL"], + "DC": 32.7 + }, + "thf":{ + "cosmors": ["thf_c0", "thf_c0"], + "dcosmors": ["thf", "thf"], + "xtb": ["thf", "thf"], + "cpcm": ["THF", "THF"], + "smd": ["TETRAHYDROFURAN", "TETRAHYDROFURAN"], + "DC": 7.6 + }, + "toluene":{ + "cosmors": ["toluene_c0", "toluene_c0"], + "dcosmors": ["toluene", "toluene"], + "xtb": ["toluene", "toluene"], + "cpcm": ["Toluene", "Toluene"], + "smd": ["TOLUENE", "TOLUENE"], + "DC": 2.4 + }, + "octanol":{ + "cosmors": ["1-octanol_c0", "1-octanol_c0"], + "dcosmors": ["octanol", "octanol"], + "xtb": ["octanol", "octanol"], + "cpcm": ["Octanol", "Octanol"], + "smd": ["1-OCTANOL", "1-OCTANOL"], + "DC": 9.9 + }, + "woctanol":{ + "cosmors": [null, "woctanol"], + "dcosmors": ["wet-otcanol", "wet-octanol"], + "xtb": ["woctanol", "woctanol"], + "cpcm": [null, "Octanol"], + "smd": [null, "1-OCTANOL"], + "DC": 8.1 + }, + "hexadecane":{ + "cosmors": ["n-hexadecane_c0", "n-hexadecane_c0"], + "dcosmors": ["hexadecane", "hexadecane"], + "xtb": ["hexadecane", "hexadecane"], + "cpcm": [null, "Hexane"], + "smd": ["N-HEXADECANE", "N-HEXADECANE"], + "DC": 2.1 + }, + "dmf":{ + "cosmors": ["dimethylformamide_c0","dimethylformamide_c0"], + "dcosmors": [null, "dimethylsulfoxide"], + "xtb": ["dmf", "dmf"], + "cpcm": ["DMF", "DMF"], + "smd": ["N,N-DIMETHYLFORMAMIDE", "N,N-DIMETHYLFORMAMIDE"], + "DC": 38.3 + }, + "aniline":{ + "cosmors": ["aniline_c0", "aniline_c0"], + "dcosmors": ["aniline", "aniline"], + "xtb": ["aniline", "aniline"], + "cpcm": [null,"Pyridine"], + "smd": ["ANILINE", "ANILINE"], + "DC": 6.9 + }, + "cyclohexane":{ + "cosmors": ["cyclohexane_c0", "cyclohexane_c0"], + "dcosmors": ["cyclohexane", "cyclohexane"], + "xtb": [null, "hexane"], + "cpcm": ["Cyclohexane", "Cyclohexane"], + "smd": ["CYCLOHEXANE", "CYCLOHEXANE"], + "DC": 2.0 + }, + "ccl4":{ + "cosmors": ["ccl4_c0", "ccl4_c0"], + "dcosmors": ["ccl4", "ccl4"], + "xtb": ["ccl4", "ccl4"], + "cpcm": ["CCl4", "CCl4"], + "smd": ["CARBON TETRACHLORIDE", "CARBON TETRACHLORIDE"], + "DC": 2.2 + }, + "diethylether":{ + "cosmors": ["diethylether_c0", "diethylether_c0"], + "dcosmors": ["diethylether", "diethylether"], + "xtb": ["ether", "ether"], + "cpcm": [null, "THF"], + "smd": ["DIETHYL ETHER", "DIETHYL ETHER"], + "DC": 4.4 + }, + "ethanol":{ + "cosmors": ["ethanol_c0", "ethanol_c0"], + "dcosmors": ["ethanol", "ethanol"], + "xtb": [null, "methanol"], + "cpcm": [null, "Methanol"], + "smd": ["ETHANOL", "ETHANOL"], + "DC": 24.6 + }, + "hexane":{ + "cosmors": ["hexane_c0", "hexane_c0"], + "dcosmors": ["hexane", "hexane"], + "xtb": ["hexane", "hexane"], + "cpcm": ["Hexane", "Hexane"], + "smd": ["N-HEXANE", "N-HEXANE"], + "DC": 1.9 + }, + "nitromethane":{ + "cosmors": ["nitromethane_c0", "nitromethane_c0"], + "dcosmors": ["nitromethane", "nitromethane"], + "xtb": ["nitromethane", "nitromethane"], + "cpcm": [null, "methanol"], + "smd": "", + "DC": 38.2 + }, + "benzaldehyde":{ + "cosmors": ["benzaldehyde_c0", "benzaldehyde_c0"], + "dcosmors": [null, "propanone"], + "xtb": ["benzaldehyde", "benzaldehyde"], + "cpcm": [null, "Pyridine"], + "smd": ["BENZALDEHYDE", "BENZALDEHYDE"], + "DC": 18.2 + }, + "benzene":{ + "cosmors": ["benzene_c0", "benzene_c0"], + "dcosmors": [null, "toluene"], + "xtb": ["benzene", "benzene"], + "cpcm": ["Benzene", "Benzene"], + "smd": ["BENZENE", "BENZENE"], + "DC": 2.3 + }, + "cs2":{ + "cosmors": ["cs2_c0", "cs2_c0"], + "dcosmors": [null, "ccl4"], + "xtb": ["cs2", "cs2"], + "cpcm": [null, "CCl4"], + "smd": ["CARBON DISULFIDE", "CARBON DISULFIDE"], + "DC": 2.6 + }, + "dioxane":{ + "cosmors": ["dioxane_c0", "dioxane_c0"], + "dcosmors": [null, "diethylether"], + "xtb": ["dioxane", "dioxane"], + "cpcm": [null, "Cyclohexane"], + "smd": ["1,4-DIOXANE", "1,4-DIOXANE"], + "DC": 2.2 + }, + "ethylacetate":{ + "cosmors": ["ethylacetate_c0", "ethylacetate_c0"], + "dcosmors": [null, "diethylether"], + "xtb": ["ethylacetate", "ethylacetate"], + "cpcm": [null, "THF"], + "smd": ["ETHYL ETHANOATE", "ETHYL ETHANOATE"], + "DC": 5.9 + }, + "furan":{ + "cosmors": ["furane_c0", "furane_c0"], + "dcosmors": [null, "diethylether"], + "xtb": ["furane", "furane"], + "cpcm": [null, "THF"], + "smd": [null, "THF"], + "DC": 3.0 + }, + "phenol":{ + "cosmors": ["phenol_c0", "phenol_c0"], + "dcosmors": [null, "thf"], + "xtb": ["phenol", "phenol"], + "cpcm": [null, "THF"], + "smd": [null, "THIOPHENOL"], + "DC": 8.0 + } +} \ No newline at end of file diff --git a/censo_qm/__init__.py b/censo_qm/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/censo_qm/__main__.py b/censo_qm/__main__.py new file mode 100644 index 0000000..a446323 --- /dev/null +++ b/censo_qm/__main__.py @@ -0,0 +1,4 @@ +from .censo import main + +if __name__ == "__main__": + main() diff --git a/censo_qm/adf_job.py b/censo_qm/adf_job.py new file mode 100644 index 0000000..714b9d0 --- /dev/null +++ b/censo_qm/adf_job.py @@ -0,0 +1,3 @@ +# adf_job.py + +# for J and S calculation only \ No newline at end of file diff --git a/censo_qm/censo.py b/censo_qm/censo.py new file mode 100644 index 0000000..90217be --- /dev/null +++ b/censo_qm/censo.py @@ -0,0 +1,234 @@ +""" +CENSO run code: +- reading commandline input --> cml() +- parsing remote configuration file, reading conformer ensemble, + checking parameters and creating or reading enso.json conformer information + --> enso_startup() +- run cheap_screeing --> part0() +- run prescreening --> part1() +- run optimization --> part2() +- run refinement --> part3() +- run nmrproperties --> part4() or +- run optical_rotation --part5() +""" +from os import getcwd +from time import perf_counter +import sys +from traceback import print_exc +from .cfg import PLENGTH, DESCR, __version__ +from .inputhandling import cml, internal_settings +from .setupcenso import enso_startup +from .cheapscreening import part0 +from .prescreening import part1 +from .optimization import part2 +from .refinement import part3 +from .nmrproperties import part4 +from .opticalrotation import part5 +from .utilities import print + + +def main(argv=None): + """ + Execute the CENSO code. + """ + # get commandline input: + args = cml(DESCR, internal_settings(), argv) + if args.version: + print(__version__) + sys.exit(0) + + # setup conformers and process input: cml >> conifgfile > internal defaults + args, config, conformers, ensembledata = enso_startup(getcwd(), args) + + # RUNNING PART0 + if config.part0: + tic = perf_counter() + try: + config, conformers, store_confs, ensembledata = part0( + config, conformers, ensembledata + ) + except Exception as error: + print("ERROR in part0!") + print("\nThe error-message is {}\n".format(error)) + print("Traceback for debugging:".center(PLENGTH, "*")) + print_exc() + print("".center(PLENGTH, "*")) + print("Going to exit!") + sys.exit(1) + toc = perf_counter() + ensembledata.part_info["part0"] = toc - tic + print(f"Ran part0 in {ensembledata.part_info['part0']:0.4f} seconds") + + # RUNNING PART1 + if config.part1: + tic = perf_counter() + try: + store_confs + except NameError: + store_confs = [] + try: + config, conformers, store_confs, ensembledata = part1( + config, conformers, store_confs, ensembledata + ) + except Exception as error: + print("ERROR in part1!") + print("\nThe error-message is {}\n".format(error)) + print("Traceback for debugging:".center(PLENGTH, "*")) + print_exc() + print("".center(PLENGTH, "*")) + print("Going to exit!") + sys.exit(1) + toc = perf_counter() + ensembledata.part_info["part1"] = toc - tic + print(f"Ran part1 in {ensembledata.part_info['part1']:0.4f} seconds") + + # RUNNING PART2 + if config.part2: + tic = perf_counter() + try: + store_confs + except NameError: + store_confs = [] + try: + config, conformers, store_confs, ensembledata = part2( + config, conformers, store_confs, ensembledata + ) + except Exception as error: + print("ERROR in part2!") + print("\nThe error-message is {}\n".format(error)) + print("Traceback for debugging:".center(PLENGTH, "*")) + print_exc() + print("".center(PLENGTH, "*")) + print("Going to exit!") + sys.exit(1) + toc = perf_counter() + ensembledata.part_info["part2"] = toc - tic + print(f"Ran part2 in {ensembledata.part_info['part2']:0.4f} seconds") + + # RUNNING PART3 + if config.part3: + tic = perf_counter() + try: + store_confs + except NameError: + store_confs = [] + try: + config, conformers, store_confs, ensembledata = part3( + config, conformers, store_confs, ensembledata + ) + except Exception as error: + print("ERROR in part3!") + print("\nThe error-message is {}\n".format(error)) + print("Traceback for debugging:".center(PLENGTH, "*")) + print_exc() + print("".center(PLENGTH, "*")) + print("Going to exit!") + sys.exit(1) + toc = perf_counter() + ensembledata.part_info["part3"] = toc - tic + print(f"Ran part3 in {ensembledata.part_info['part3']:0.4f} seconds") + + # RUNNING PART4 + if config.part4: + tic = perf_counter() + try: + store_confs + except NameError: + store_confs = [] + try: + config, conformers, store_confs, ensembledata = part4( + config, conformers, store_confs, ensembledata + ) + except Exception as error: + print("ERROR in part4!") + print("\nThe error-message is {}\n".format(error)) + print("Traceback for debugging:".center(PLENGTH, "*")) + print_exc() + print("".center(PLENGTH, "*")) + print("Going to exit!") + sys.exit(1) + toc = perf_counter() + ensembledata.part_info["part4"] = toc - tic + print(f"Ran part4 in {ensembledata.part_info['part4']:0.4f} seconds") + + # RUNNING PART5 + if config.optical_rotation: + tic = perf_counter() + try: + store_confs + except NameError: + store_confs = [] + try: + config, conformers, store_confs, ensembledata = part5( + config, conformers, store_confs, ensembledata + ) + except Exception as error: + print("ERROR in part5!") + print("\nThe error-message is {}\n".format(error)) + print("Traceback for debugging:".center(PLENGTH, "*")) + print_exc() + print("".center(PLENGTH, "*")) + print("Going to exit!") + sys.exit(1) + toc = perf_counter() + ensembledata.part_info["part5"] = toc - tic + print(f"Ran part5 in {ensembledata.part_info['part5']:0.4f} seconds") + + # save current data to jsonfile + config.write_json( + config.cwd, + [i.provide_runinfo() for i in conformers] + + [i.provide_runinfo() for i in store_confs] + + [ensembledata], + config.provide_runinfo(), + ) + + # END of CENSO + timings = 0.0 + if len(str(config.nconf)) > 5: + conflength = len(str(config.nconf)) + else: + conflength = 5 + + print(f"\n\n{'Part':20}: {'#conf':>{conflength}} time") + print("".ljust(int(PLENGTH / 2), "-")) + print(f"{'Input':20}: {ensembledata.nconfs_per_part['starting']:{conflength}} -") + if config.part0: + print( + f"{'Part0_all':20}: {ensembledata.nconfs_per_part['part0']:{conflength}} {ensembledata.part_info['part0']:.2f}s" + ) + timings += ensembledata.part_info["part0"] + if config.part1: + print( + f"{'Part1_initial_sort':20}: {ensembledata.nconfs_per_part['part1_firstsort']:{conflength}} -" + ) + print( + f"{'Part1_all':20}: {ensembledata.nconfs_per_part['part1_firstsort']:{conflength}} {ensembledata.part_info['part1']:.2f}s" + ) + timings += ensembledata.part_info["part1"] + if config.part2: + print( + f"{'Part2_opt':20}: {ensembledata.nconfs_per_part['part2_opt']:{conflength}} -" + ) + print( + f"{'Part2_all':20}: {ensembledata.nconfs_per_part['part2']:{conflength}} {ensembledata.part_info['part2']:.2f}s" + ) + timings += ensembledata.part_info["part2"] + if config.part3: + print( + f"{'Part3_all':20}: {ensembledata.nconfs_per_part['part3']:{conflength}} {ensembledata.part_info['part3']:.2f}s" + ) + timings += ensembledata.part_info["part3"] + if config.part4: + print( + f"{'Part4':20}: {'':{conflength}} {ensembledata.part_info['part4']:.2f}s" + ) + timings += ensembledata.part_info["part4"] + if config.optical_rotation: + print( + f"{'Part5':20}: {'':{conflength}} {ensembledata.part_info['part5']:.2f}s" + ) + timings += ensembledata.part_info["part5"] + print("".ljust(int(PLENGTH / 2), "-")) + print(f"{'All parts':20}: {'':{conflength}} {timings:.2f}s") + print("\nCENSO all done!") diff --git a/censo_qm/cfg.py b/censo_qm/cfg.py new file mode 100644 index 0000000..cde2e22 --- /dev/null +++ b/censo_qm/cfg.py @@ -0,0 +1,270 @@ +""" +Storing constants for the use in all CENSO modules. +Storing program paths --> still in transition +Storing censo_solvent_db solvent database across all solvation models (as fallback) +""" +import os + +__version__ = "1.0.0" +DESCR = f""" + ______________________________________________________________ + | | + | | + | CENSO - Commandline ENSO | + | v {__version__:<{19}} | + | energetic sorting of CREST Conformer Rotamer Ensembles | + | University of Bonn, MCTC | + | June 2020 | + | based on ENSO version 2.0.1 | + | F. Bohle and S. Grimme | + | | + |______________________________________________________________| + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +""" +global ENVIRON +ENVIRON = os.environ.copy() +CODING = "ISO-8859-1" +DIGILEN = 60 +PLENGTH = 100 +AU2J = 4.3597482e-18 # a.u.(hartree/mol) to J +KB = 1.3806485279e-23 # J/K +AU2KCAL = 627.50947428 +BOHR2ANG = 0.52917721067 + +# program paths: + +external_paths = {} +external_paths["orcapath"] = "" +external_paths["orcaversion"] = "" +external_paths["xtbpath"] = "" +external_paths["crestpath"] = "" +external_paths["cosmorssetup"] = "" +external_paths["dbpath"] = "" +external_paths["cosmothermversion"] = "" +external_paths["mpshiftpath"] = "" +external_paths["escfpath"] = "" +external_paths["cefinepath"] = "" + + +# censo solvent database to chose solvents across all available solvent models +censo_solvent_db = { + "acetone": { + "cosmors": ["propanone_c0", "propanone_c0"], + "dcosmors": ["propanone", "propanone"], + "xtb": ["acetone", "acetone"], + "cpcm": ["acetone", "acetone"], + "smd": ["ACETONE", "ACETONE"], + "DC": 20.7, + }, + "chcl3": { + "cosmors": ["chcl3_c0", "chcl3_c0"], + "dcosmors": ["chcl3", "chcl3"], + "xtb": ["chcl3", "chcl3"], + "cpcm": ["chloroform", "chloroform"], + "smd": ["CHLOROFORM", "CHLOROFORM"], + "DC": 4.8, + }, + "acetonitrile": { + "cosmors": ["acetonitrile_c0", "acetonitrile_c0"], + "dcosmors": ["acetonitrile", "acetonitrile"], + "xtb": ["acetonitrile", "acetonitrile"], + "cpcm": ["acetonitrile", "acetonitrile"], + "smd": ["ACETONITRILE", "ACETONITRILE"], + "DC": 36.6, + }, + "ch2cl2": { + "cosmors": ["ch2cl2_c0", "ch2cl2_c0"], + "dcosmors": [None, "chcl3"], + "xtb": ["ch2cl2", "ch2cl2"], + "cpcm": ["CH2Cl2", "CH2Cl2"], + "smd": ["DICHLOROMETHANE", "DICHLOROMETHANE"], + "DC": 9.1, + }, + "dmso": { + "cosmors": ["dimethylsulfoxide_c0", "dimethylsulfoxide_c0"], + "dcosmors": ["dimethylsulfoxide", "dimethylsulfoxide"], + "xtb": ["dmso", "dmso"], + "cpcm": ["DMSO", "DMSO"], + "smd": ["DIMETHYLSULFOXIDE", "DIMETHYLSULFOXIDE"], + "DC": 47.2, + }, + "h2o": { + "cosmors": ["h2o_c0", "h2o_c0"], + "dcosmors": ["h2o", "h2o"], + "xtb": ["h2o", "h2o"], + "cpcm": ["Water", "Water"], + "smd": ["WATER", "WATER"], + "DC": 80.1, + }, + "methanol": { + "cosmors": ["methanol_c0", "methanol_c0"], + "dcosmors": ["methanol", "methanol"], + "xtb": ["methanol", "methanol"], + "cpcm": ["Methanol", "Methanol"], + "smd": ["METHANOL", "METHANOL"], + "DC": 32.7, + }, + "thf": { + "cosmors": ["thf_c0", "thf_c0"], + "dcosmors": ["thf", "thf"], + "xtb": ["thf", "thf"], + "cpcm": ["THF", "THF"], + "smd": ["TETRAHYDROFURAN", "TETRAHYDROFURAN"], + "DC": 7.6, + }, + "toluene": { + "cosmors": ["toluene_c0", "toluene_c0"], + "dcosmors": ["toluene", "toluene"], + "xtb": ["toluene", "toluene"], + "cpcm": ["Toluene", "Toluene"], + "smd": ["TOLUENE", "TOLUENE"], + "DC": 2.4, + }, + "octanol": { + "cosmors": ["1-octanol_c0", "1-octanol_c0"], + "dcosmors": ["octanol", "octanol"], + "xtb": ["octanol", "octanol"], + "cpcm": ["Octanol", "Octanol"], + "smd": ["1-OCTANOL", "1-OCTANOL"], + "DC": 9.9, + }, + "woctanol": { + "cosmors": [None, "woctanol"], + "dcosmors": ["wet-otcanol", "wet-octanol"], + "xtb": ["woctanol", "woctanol"], + "cpcm": [None, "Octanol"], + "smd": [None, "1-OCTANOL"], + "DC": 8.1, + }, + "hexadecane": { + "cosmors": ["n-hexadecane_c0", "n-hexadecane_c0"], + "dcosmors": ["hexadecane", "hexadecane"], + "xtb": ["hexadecane", "hexadecane"], + "cpcm": [None, "Hexane"], + "smd": ["N-HEXADECANE", "N-HEXADECANE"], + "DC": 2.1, + }, + "dmf": { + "cosmors": ["dimethylformamide_c0", "dimethylformamide_c0"], + "dcosmors": [None, "dimethylsulfoxide"], + "xtb": ["dmf", "dmf"], + "cpcm": ["DMF", "DMF"], + "smd": ["N,N-DIMETHYLFORMAMIDE", "N,N-DIMETHYLFORMAMIDE"], + "DC": 38.3, + }, + "aniline": { + "cosmors": ["aniline_c0", "aniline_c0"], + "dcosmors": ["aniline", "aniline"], + "xtb": ["aniline", "aniline"], + "cpcm": [None, "Pyridine"], + "smd": ["ANILINE", "ANILINE"], + "DC": 6.9, + }, + "cyclohexane": { + "cosmors": ["cyclohexane_c0", "cyclohexane_c0"], + "dcosmors": ["cyclohexane", "cyclohexane"], + "xtb": [None, "hexane"], + "cpcm": ["Cyclohexane", "Cyclohexane"], + "smd": ["CYCLOHEXANE", "CYCLOHEXANE"], + "DC": 2.0, + }, + "ccl4": { + "cosmors": ["ccl4_c0", "ccl4_c0"], + "dcosmors": ["ccl4", "ccl4"], + "xtb": ["ccl4", "ccl4"], + "cpcm": ["CCl4", "CCl4"], + "smd": ["CARBON TETRACHLORIDE", "CARBON TETRACHLORIDE"], + "DC": 2.2, + }, + "diethylether": { + "cosmors": ["diethylether_c0", "diethylether_c0"], + "dcosmors": ["diethylether", "diethylether"], + "xtb": ["ether", "ether"], + "cpcm": [None, "THF"], + "smd": ["DIETHYL ETHER", "DIETHYL ETHER"], + "DC": 4.4, + }, + "ethanol": { + "cosmors": ["ethanol_c0", "ethanol_c0"], + "dcosmors": ["ethanol", "ethanol"], + "xtb": [None, "methanol"], + "cpcm": [None, "Methanol"], + "smd": ["ETHANOL", "ETHANOL"], + "DC": 24.6, + }, + "hexane": { + "cosmors": ["hexane_c0", "hexane_c0"], + "dcosmors": ["hexane", "hexane"], + "xtb": ["hexane", "hexane"], + "cpcm": ["Hexane", "Hexane"], + "smd": ["N-HEXANE", "N-HEXANE"], + "DC": 1.9, + }, + "nitromethane": { + "cosmors": ["nitromethane_c0", "nitromethane_c0"], + "dcosmors": ["nitromethane", "nitromethane"], + "xtb": ["nitromethane", "nitromethane"], + "cpcm": [None, "methanol"], + "smd": "", + "DC": 38.2, + }, + "benzaldehyde": { + "cosmors": ["benzaldehyde_c0", "benzaldehyde_c0"], + "dcosmors": [None, "propanone"], + "xtb": ["benzaldehyde", "benzaldehyde"], + "cpcm": [None, "Pyridine"], + "smd": ["BENZALDEHYDE", "BENZALDEHYDE"], + "DC": 18.2, + }, + "benzene": { + "cosmors": ["benzene_c0", "benzene_c0"], + "dcosmors": [None, "toluene"], + "xtb": ["benzene", "benzene"], + "cpcm": ["Benzene", "Benzene"], + "smd": ["BENZENE", "BENZENE"], + "DC": 2.3, + }, + "cs2": { + "cosmors": ["cs2_c0", "cs2_c0"], + "dcosmors": [None, "ccl4"], + "xtb": ["cs2", "cs2"], + "cpcm": [None, "CCl4"], + "smd": ["CARBON DISULFIDE", "CARBON DISULFIDE"], + "DC": 2.6, + }, + "dioxane": { + "cosmors": ["dioxane_c0", "dioxane_c0"], + "dcosmors": [None, "diethylether"], + "xtb": ["dioxane", "dioxane"], + "cpcm": [None, "Cyclohexane"], + "smd": ["1,4-DIOXANE", "1,4-DIOXANE"], + "DC": 2.2, + }, + "ethylacetate": { + "cosmors": ["ethylacetate_c0", "ethylacetate_c0"], + "dcosmors": [None, "diethylether"], + "xtb": ["ethylacetate", "ethylacetate"], + "cpcm": [None, "THF"], + "smd": ["ETHYL ETHANOATE", "ETHYL ETHANOATE"], + "DC": 5.9, + }, + "furan": { + "cosmors": ["furane_c0", "furane_c0"], + "dcosmors": [None, "diethylether"], + "xtb": ["furane", "furane"], + "cpcm": [None, "THF"], + "smd": [None, "THF"], + "DC": 3.0, + }, + "phenol": { + "cosmors": ["phenol_c0", "phenol_c0"], + "dcosmors": [None, "thf"], + "xtb": ["phenol", "phenol"], + "cpcm": [None, "THF"], + "smd": [None, "THIOPHENOL"], + "DC": 8.0, + }, +} diff --git a/censo_qm/cheapscreening.py b/censo_qm/cheapscreening.py new file mode 100644 index 0000000..23c3502 --- /dev/null +++ b/censo_qm/cheapscreening.py @@ -0,0 +1,534 @@ +""" +prescreening == part0, calculate cheap free energy on GFNn-xTB input geometry +The idea is to improve on the description of E with a very fast DFT method. +""" +import os +import sys +import math +from multiprocessing import JoinableQueue as Queue +from .cfg import PLENGTH, DIGILEN, AU2KCAL, CODING +from .parallel import run_in_parallel +from .orca_job import OrcaJob +from .tm_job import TmJob +from .utilities import ( + check_for_folder, + print_block, + new_folders, + last_folders, + ensemble2coord, + printout, + move_recursively, + write_trj, + check_tasks, + calc_std_dev, + spearman, + print, + calc_boltzmannweights, +) + + +def part0(config, conformers, ensembledata): + """ + Cheap prescreening of the ensemble, with single-points on combined ensemble + geometries. + Input: + - config [conifg_setup object] contains all settings + - conformers [list of molecule_data objects] each conformer is represented + - ensembledata -> instance for saving ensemble (not conf) related data + Return: + -> config + -> conformers + -> store_confs + """ + save_errors = [] + print("\n" + "".ljust(PLENGTH, "-")) + print("CRE CHEAP-PRESCREENING - PART0".center(PLENGTH, " ")) + print("".ljust(PLENGTH, "-") + "\n") + # print flags for part1 + info = [] + info.append(["prog", "program"]) + info.append(["func0", "functional for part0"]) + info.append(["basis0", "basis set for part0"]) + info.append(["part0_threshold", "threshold"]) + info.append(["nconf", "starting number of considered conformers"]) + + optionsexchange = {True: "on", False: "off"} + for item in info: + if item[0] == "justprint": + print(item[1:][0]) + else: + if item[0] == "printoption": + option = item[2] + else: + option = getattr(config, item[0]) + if option is True or option is False: + option = optionsexchange[option] + elif isinstance(option, list): + option = ", ".join(option) + print( + "{}: {:{digits}} {}".format( + item[1], "", option, digits=DIGILEN - len(item[1]) + ) + ) + print("") + # end print + + calculate = [] # has to be calculated in this run + prev_calculated = [] # was already calculated in a previous run + store_confs = [] # stores all confs which are sorted out! + + print("Calculating efficient gas-phase single-point energies:") + + # setup queues + q = Queue() + resultq = Queue() + + if config.prog == "tm": + job = TmJob + elif config.prog == "orca": + job = OrcaJob + + for conf in list(conformers): + conf = conformers.pop(conformers.index(conf)) + if conf.removed: + store_confs.append(conf) + print(f"CONF{conf.id} is removed as requested by the user.") + continue + if conf.id > config.nconf: + store_confs.append(conf) + continue + if conf.cheap_prescreening_sp_info["info"] == "not_calculated": + calculate.append(conf) + elif conf.cheap_prescreening_sp_info["info"] == "failed": + store_confs.append(conf) + print(f"Calculation of CONF{conf.id} failed in the previous run.") + elif conf.cheap_prescreening_sp_info["info"] == "calculated": + conf.job["success"] = True + prev_calculated.append(conf) + else: + print("ERROR: UNEXPECTED BEHAVIOUR") + if not calculate and not prev_calculated: + print("ERROR: No conformers left!") + if prev_calculated: + check_for_folder(config.cwd, [i.id for i in prev_calculated], config.func) + print("The efficient gas-phase single-point was calculated before for:") + print_block(["CONF" + str(i.id) for i in prev_calculated]) + pl = config.lenconfx + 4 + len(str("/" + config.func)) + + if config.solvent != "gas": + instruction = { + "jobtype": "alpb_gsolv", + "func": config.func0, + "basis": getattr( + config, + "basis0", + config.func_basis_default.get(config.func0, "def2-SV(P)"), + ), + "charge": config.charge, + "unpaired": config.unpaired, + "solvent": config.solvent, + "sm": config.sm_rrho, + "omp": config.omp, + "gfn_version": config.part0_gfnv, + "energy": 0.0, + "energy2": 0.0, + "success": False, + "xtb_driver_path": config.external_paths["xtbpath"], + } + + tmp_disp = "" + if config.prog == "tm": + instruction["prepinfo"] = ["clear", "-grid", "1", "-scfconv", "5", "DOGCP"] + if config.func0 == "b97-d": + instruction["prepinfo"].append("-zero") + tmp_disp = "D3(0)" + + elif config.prog == "orca": + instruction["progpath"] = config.external_paths["orcapath"] + instruction["prepinfo"] = ["low", "DOGCP"] + + instruction["method"], instruction["method2"], = config.get_method_name( + instruction["jobtype"], + func=instruction["func"], + basis=instruction["basis"], + sm=instruction["sm"], + solvent=instruction["solvent"], + prog=config.prog, + disp=tmp_disp, + gfn_version=instruction["gfn_version"], + ) + elif config.solvent == "gas": + instruction = { + "jobtype": "sp", + "func": config.func0, + "basis": getattr( + config, + "basis0", + config.func_basis_default.get(config.func0, "def2-SV(P)"), + ), + "charge": config.charge, + "unpaired": config.unpaired, + "solvent": "gas", + "sm": "gas-phase", + "omp": config.omp, + "energy": 0.0, + "energy2": 0.0, + "success": False, + } + + tmp_disp = "" + if config.prog == "tm": + instruction["prepinfo"] = ["clear", "-grid", "1", "-scfconv", "5", "DOGCP"] + if config.func0 == "b97-d": + instruction["prepinfo"].append("-zero") + tmp_disp = "D3(0)" + + elif config.prog == "orca": + instruction["progpath"] = config.external_paths["orcapath"] + instruction["prepinfo"] = ["low"] + + instruction["method"], instruction["method2"], = config.get_method_name( + instruction["jobtype"], + func=instruction["func"], + basis=instruction["basis"], + sm=instruction["sm"], + solvent=instruction["solvent"], + prog=config.prog, + disp=tmp_disp, + ) + + name = "efficient gas-phase single-point" + folder = "part0_sp" + check = {True: "was successful", False: "FAILED"} + if calculate: + print(f"The {name} is calculated for:") + print_block(["CONF" + str(i.id) for i in calculate]) + # create folders: + save_errors, store_confs, calculate = new_folders( + config.cwd, calculate, folder, save_errors, store_confs + ) + # write coord to folder + calculate, store_confs, save_errors = ensemble2coord( + config, folder, calculate, store_confs, save_errors + ) + + # parallel calculation: + calculate = run_in_parallel( + config, q, resultq, job, config.maxthreads, calculate, instruction, folder + ) + + for conf in list(calculate): + if instruction["jobtype"] == "alpb_gsolv": + line = ( + f"The {name} {check[conf.job['success']]}" + f" for {last_folders(conf.job['workdir'], 2):>{pl}}: " + f"E(DFT) = {conf.job['energy']:>.8f}" + f" Gsolv = {conf.job['energy2']:>.8f}" + ) + print(line) + if not conf.job["success"]: + save_errors.append(line) + conf.cheap_prescreening_sp_info["info"] = "failed" + conf.cheap_prescreening_sp_info["method"] = conf.job["method"] + conf.cheap_prescreening_gsolv_info["info"] = "failed" + conf.cheap_prescreening_gsolv_info["method"] = conf.job["method"] + store_confs.append(calculate.pop(calculate.index(conf))) + else: + conf.cheap_prescreening_sp_info["energy"] = conf.job["energy"] + conf.cheap_prescreening_sp_info["info"] = "calculated" + conf.cheap_prescreening_sp_info["method"] = conf.job["method"] + conf.cheap_prescreening_gsolv_info["energy"] = conf.job["energy2"] + conf.cheap_prescreening_gsolv_info["info"] = "calculated" + conf.cheap_prescreening_gsolv_info["method"] = conf.job["method2"] + conf.cheap_prescreening_gsolv_info["gas-energy"] = conf.job[ + "energy_xtb_gas" + ] + conf.cheap_prescreening_gsolv_info["solv-energy"] = conf.job[ + "energy_xtb_solv" + ] + elif instruction["jobtype"] == "sp": + line = ( + f"The {name} calculation {check[conf.job['success']]}" + f" for {last_folders(conf.job['workdir'], 2):>{pl}}: " + f"E(DFT) = {conf.job['energy']:>.8f}" + ) + print(line) + if not conf.job["success"]: + save_errors.append(line) + conf.cheap_prescreening_sp_info["info"] = "failed" + conf.cheap_prescreening_sp_info["method"] = conf.job["method"] + store_confs.append(calculate.pop(calculate.index(conf))) + else: + conf.cheap_prescreening_sp_info["energy"] = conf.job["energy"] + conf.cheap_prescreening_sp_info["info"] = "calculated" + conf.cheap_prescreening_sp_info["method"] = conf.job["method"] + else: + print( + f'UNEXPECTED BEHAVIOUR: {conf.job["success"]} {conf.job["jobtype"]}' + ) + # save current data to jsonfile + config.write_json( + config.cwd, + [i.provide_runinfo() for i in calculate] + + [i.provide_runinfo() for i in prev_calculated] + + [i.provide_runinfo() for i in store_confs] + + [ensembledata], + config.provide_runinfo(), + ) + check_tasks(calculate, config.check) + else: + print("No conformers are considered additionally.") + if prev_calculated: + # adding conformers calculated before: + for conf in list(prev_calculated): + conf.job["workdir"] = os.path.normpath( + os.path.join(config.cwd, "CONF" + str(conf.id), config.func) + ) + if instruction["jobtype"] == "alpb_gsolv": + print( + f"The {name} {check[conf.job['success']]} for " + f"{last_folders(conf.job['workdir'], 2):>{pl}}: " + f"E(DFT) = {conf.cheap_prescreening_sp_info['energy']:>.8f}" + f" Gsolv = {conf.cheap_prescreening_gsolv_info['energy']:>.8f}" + ) + elif instruction["jobtype"] == "sp": + print( + f"The {name} calculation {check[conf.job['success']]} for " + f"{last_folders(conf.job['workdir'], 2):>{pl}}: " + f"E(DFT) = {conf.cheap_prescreening_sp_info['energy']:>.8f}" + ) + calculate.append(prev_calculated.pop(prev_calculated.index(conf))) + for conf in calculate: + conf.reset_job_info() + if not calculate: + print("ERROR: No conformers left!") + print("Going to exit!") + sys.exit(1) + + # *************************************************************************** + # sorting by E + # (remove high lying conformers above part0_threshold) + print("\n" + "".ljust(int(PLENGTH), "-")) + print( + "Removing high lying conformers by improved energy description".center( + int(PLENGTH), " " + ) + ) + print("".ljust(int(PLENGTH), "-") + "\n") + + if config.solvent != "gas": + solvation = "cheap_prescreening_gsolv_info" + else: + solvation = None + rrho = None + energy = "cheap_prescreening_sp_info" + for conf in calculate: + conf.calc_free_energy(e=energy, solv=solvation, rrho=rrho) + try: + minfree = min([i.free_energy for i in calculate if i is not None]) + except ValueError: + raise + for conf in calculate: + if conf.free_energy == minfree: + ensembledata.bestconf["part0"] = conf.id + lowest_e = conf.cheap_prescreening_sp_info["energy"] + lowest_gsolv = conf.cheap_prescreening_gsolv_info["energy"] + if config.solvent != "gas": + try: + minfree_gfnx = min( + [ + i.cheap_prescreening_gsolv_info["solv-energy"] + for i in calculate + if i is not None + ] + ) + except ValueError: + raise + for conf in calculate: + conf.rel_free_energy = (conf.free_energy - minfree) * AU2KCAL + if config.solvent != "gas": + conf.tmp_rel_xtb = ( + conf.cheap_prescreening_gsolv_info["solv-energy"] - minfree_gfnx + ) * AU2KCAL + conf.tmp_rel_e = ( + conf.cheap_prescreening_sp_info["energy"] - lowest_e + ) * AU2KCAL + conf.tmp_rel_gsolv = ( + conf.cheap_prescreening_gsolv_info["energy"] - lowest_gsolv + ) * AU2KCAL + + try: + maxreldft = max([i.rel_free_energy for i in calculate if i is not None]) + except ValueError: + print("ERROR: No conformer left or Error in maxreldft!") + # print sorting + columncall = [ + lambda conf: "CONF" + str(getattr(conf, "id")), + lambda conf: getattr(conf, "cheap_prescreening_gsolv_info")["solv-energy"], + lambda conf: getattr(conf, "tmp_rel_xtb"), + lambda conf: getattr(conf, "cheap_prescreening_sp_info")["energy"], + lambda conf: getattr(conf, "cheap_prescreening_gsolv_info")["energy"], + lambda conf: getattr(conf, "free_energy"), + lambda conf: getattr(conf, "tmp_rel_e"), + lambda conf: getattr(conf, "tmp_rel_gsolv"), + lambda conf: getattr(conf, "rel_free_energy"), + ] + columnheader = [ + "CONF#", + f"G [Eh]", + f"ΔG [kcal/mol]", + "E [Eh]", + "Gsolv [Eh]", + "Gtot", + "ΔE(DFT)", + "ΔGsolv", + "ΔGtot", + ] + columndescription = [ + "", + "", + "", + "", + "[Eh]", + "[Eh]", + "[kcal/mol]", + "[kcal/mol]", + "[kcal/mol]", + ] + columnformat = [ + "", + (12, 7), + (5, 2), + (12, 7), + (12, 7), + (12, 7), + (5, 2), + (5, 2), + (5, 2), + ] + columndescription[1] = f"{config.part0_gfnv.upper()}-xTB[{config.sm_rrho.upper()}]" + columndescription[2] = f"{config.part0_gfnv.upper()}-xTB[{config.sm_rrho.upper()}]" + columndescription[3] = instruction["method"] + columndescription[4] = instruction["method2"] + if config.solvent == "gas": + columncall[1] = lambda conf: getattr(conf, "xtb_energy") + columncall[2] = lambda conf: getattr(conf, "rel_xtb_energy") + columnheader[1] = "G(GFNn-xTB)" + columnheader[2] = "ΔG(GFNn-xTB)" + columndescription[1] = "[Eh]" + columndescription[2] = "[kcal/mol]" + columndescription[4] = "gas-phase" + + calculate.sort(key=lambda x: int(x.id)) + printout( + os.path.join(config.cwd, "part0.dat"), + columncall, + columnheader, + columndescription, + columnformat, + calculate, + minfree, + ) + print("".ljust(int(PLENGTH), "-")) + # -------------------------------------------------------------------------- + + # write to enso.json + config.write_json( + config.cwd, + [i.provide_runinfo() for i in calculate] + + [i.provide_runinfo() for i in prev_calculated] + + [i.provide_runinfo() for i in store_confs] + + [ensembledata], + config.provide_runinfo(), + ) + # --------------------------------------------------------------------------- + # sorting + if maxreldft > config.part0_threshold: + print("\n" + "".ljust(int(PLENGTH / 2), "-")) + print("Conformers considered further".center(int(PLENGTH / 2), " ")) + print("".ljust(int(PLENGTH / 2), "-") + "\n") + for conf in list(calculate): + if conf.rel_free_energy <= config.part0_threshold: + conf.part_info["part0"] = "passed" + else: + conf.part_info["part0"] = "refused" + store_confs.append(calculate.pop(calculate.index(conf))) + if calculate: + print( + f"These conformers are below the {config.part0_threshold:.3f} " + f"kcal/mol threshold.\n" + ) + print_block(["CONF" + str(i.id) for i in calculate]) + else: + print("Error: There are no more conformers left!") + else: + for conf in list(calculate): + conf.part_info["part0"] = "passed" + print( + "\nAll relative (free) energies are below the initial threshold " + f"of {config.part0_threshold} kcal/mol.\nAll conformers are " + "considered further." + ) + ensembledata.nconfs_per_part["part0"] = len(calculate) + + ################################################################################ + # calculate average G correction + print( + "\nCalculating Boltzmann averaged (free) energy of ensemble on input " + "geometries (not DFT optimized)!\n" + ) + # calculate Boltzmannweights + print(f"{'temperature /K:':<15} {'avE(T) /a.u.':>14} " f"{'avG(T) /a.u.':>14} ") + print("".ljust(int(PLENGTH), "-")) + + calculate = calc_boltzmannweights(calculate, "free_energy", config.temperature) + avG = 0.0 + avE = 0.0 + for conf in calculate: + avG += conf.bm_weight * conf.free_energy + avE += conf.bm_weight * conf.cheap_prescreening_sp_info["energy"] + # printout: + print(f"{config.temperature:^15} {avE:>14.7f} {avG:>14.7f} " " <<==part0==") + print("".ljust(int(PLENGTH), "-")) + print("") + ################################################################################ + + # reset + for conf in calculate: + conf.free_energy = 0.0 + conf.rel_free_energy = None + conf.bm_weight = 0.0 + conf.tmp_rel_xtb = 0.0 + conf.tmp_rel_e = 0.0 + conf.tmp_rel_gsolv = 0.0 + conf.reset_job_info() + + # write to enso.json + config.write_json( + config.cwd, + [i.provide_runinfo() for i in calculate] + + [i.provide_runinfo() for i in prev_calculated] + + [i.provide_runinfo() for i in store_confs] + + [ensembledata], + config.provide_runinfo(), + ) + + if save_errors: + print( + "***---------------------------------------------------------***", + file=sys.stderr, + ) + print( + "Printing most relevant errors again, just for user convenience:", + file=sys.stderr, + ) + for _ in list(save_errors): + print(save_errors.pop(), file=sys.stderr) + print( + "***---------------------------------------------------------***", + file=sys.stderr, + ) + + tmp = int((PLENGTH - len("END of Part0")) / 2) + print("\n" + "".ljust(tmp, ">") + "END of Part0" + "".rjust(tmp, "<")) + return config, calculate, store_confs, ensembledata diff --git a/censo_qm/datastructure.py b/censo_qm/datastructure.py new file mode 100755 index 0000000..0b06fb1 --- /dev/null +++ b/censo_qm/datastructure.py @@ -0,0 +1,544 @@ +""" +contains molecule_data class for storing all thermodyn. properties of the +conformer. +""" +from collections import OrderedDict +from .utilities import print + + +class MoleculeData: + """ + molecule_data contains all thermodynamic properties of a conformer e.g. + energy, gsolv, grrho + """ + + def __init__( + self, + rank, + chrg=0, + uhf=0, + xtb_energy=None, + xtb_energy_unbiased=None, + xtb_free_energy=None, + rel_xtb_energy=None, + rel_xtb_free_energy=None, + sym="c1", + gi=1.0, + removed=False, + free_energy=0.0, + temperature_info={"temperature": 298.15, "range": None}, + cheap_prescreening_sp_info={ + "energy": None, + "info": "not_calculated", + "method": None, + "prev_methods": None, + }, + cheap_prescreening_gsolv_info={ + "energy": None, + "gas-energy": None, + "solv-energy": None, + "info": "not_calculated", + "method": None, + "prev_methods": None, + }, + prescreening_sp_info={ + "energy": None, + "info": "not_calculated", + "method": None, + "prev_methods": None, + }, + lowlevel_sp_info={ + "energy": None, + "info": "not_calculated", + "method": None, + "prev_methods": None, + }, + highlevel_sp_info={ + "energy": None, + "info": "not_calculated", + "method": None, + "prev_methods": None, + }, + prescreening_grrho_info={ + "energy": None, + "info": "not_calculated", + "method": None, + "fuzzythr": 0.0, + "rmsd": None, + "prev_methods": None, + }, + lowlevel_grrho_info={ + "energy": None, + "range": None, + "info": "not_calculated", + "method": None, + "rmsd": None, + "prev_methods": None, + }, + lowlevel_hrrho_info={ + "energy": None, + "range": None, + "info": "not_calculated", + "method": None, + "rmsd": None, + "prev_methods": None, + }, + highlevel_grrho_info={ + "energy": None, + "range": None, + "info": "not_calculated", + "method": None, + "rmsd": None, + "prev_methods": None, + }, + highlevel_hrrho_info={ + "energy": None, + "range": None, + "info": "not_calculated", + "method": None, + "rmsd": None, + "prev_methods": None, + }, + prescreening_gsolv_info={ + "energy": None, + "gas-energy": None, + "info": "not_calculated", + "method": None, + "prev_methods": None, + }, + lowlevel_gsolv_info={ + "energy": None, + "gas-energy": None, + "range": None, + "info": "not_calculated", + "method": None, + "prev_methods": None, + }, + lowlevel_gsolv_compare_info={ + "energy": None, + "range": None, + "info": "not_calculated", + "method": None, + "prev_methods": None, + "std_dev": None, + }, + highlevel_gsolv_info={ + "energy": None, + "gas-energy": None, + "range": None, + "info": "not_calculated", + "method": None, + "prev_methods": None, + }, + optimization_info={ + "energy": None, + "convergence": "not_converged", + "cregen_sort": "pass", # pass and removed + "info": "not_calculated", + "cycles": 0, + "ecyc": [], + "decyc": [], + "energy_rrho": 0.0, + "method_rrho": None, + "info_rrho": "not_calculated", + }, + nmr_coupling_info={ + "info": "not_calculated", + "method": None, + "h_active": False, + "c_active": False, + "f_active": False, + "si_active": False, + "p_active": False, + }, + nmr_shielding_info={ + "info": "not_calculated", + "method": None, + "h_active": False, + "c_active": False, + "f_active": False, + "si_active": False, + "p_active": False, + }, + part_info={ + "part0": None, + "part1": None, + "part2": None, + "part3": None, + "part4": None, + "part5": None, + }, + comment=[], + optical_rotation_info={ + "range": None, + "info": "not_calculated", + "method": None, + "prev_methods": None, + }, + ): + """ + molecule_data: Creates a molecule instance where all thermodynamic data + concerning the molecule is stored. + Input: + - rank [int] --> rank of the molecule in the input ensemble e.g. CONF(X) + - temperature [float] --> evaluation at this temperature + - trange [list(float)] --> list of temperatures for evaluation at + multiple temperatures + - chrg [int] --> charge of the molecule + - uhf [int] --> number of unpaired electrons + - xtb_energy [float] a.u.--> initial energy taken from the input ensemble + - rel_xtb_energy [float] kcal/mol --> relative initial energy taken + from the input ensemble + - sym [string] --> schoenflies notation of pointgroup + - gi [float] --> degeneracy of conformer + - prescreening_sp [float] --> single point energy of the preescreening + procecure + - lowlevel_sp [float] --> single point energy of the optimization + - highlevel_sp [float] --> high level single point energy at hybrid + level with larger basis set + - prescreening_grrho [float] --> thermostatistical contribution of + prescreening procedure + - grrho [float] --> thermostatistical (G) contribution on optimized DFT + geometry + - hrrho [float] --> thermostatistical (H) contribution on optimized DFT + geometry + - part_info [string] --> partx : passed/refused/not_calculated + + + *_info = {'info': calculated/not_calculated/failed/skipped/removed/prep-failed} + """ + # check default arguments: + for store in ( + prescreening_grrho_info, + lowlevel_grrho_info, + lowlevel_hrrho_info, + highlevel_grrho_info, + highlevel_hrrho_info, + prescreening_gsolv_info, + lowlevel_gsolv_info, + lowlevel_gsolv_compare_info, + highlevel_gsolv_info, + optical_rotation_info, + prescreening_sp_info, + lowlevel_sp_info, + highlevel_sp_info, + cheap_prescreening_sp_info, + cheap_prescreening_gsolv_info, + ): + if store.get("prev_methods", None) is None: + store["prev_methods"] = {} + if temperature_info.get("range") is None: + temperature_info["range"] = [] + if xtb_energy is None: + xtb_energy = 100.0 + if xtb_energy_unbiased is None: + xtb_energy_unbiased = 100.0 + if xtb_free_energy is None: + xtb_free_energy = 100.0 + if rel_xtb_energy is None: + rel_xtb_energy = 100.0 + if rel_xtb_free_energy is None: + rel_xtb_free_energy = 100.0 + if cheap_prescreening_sp_info.get("energy") is None: + cheap_prescreening_sp_info["energy"] = 0.0 + if cheap_prescreening_gsolv_info.get("energy") is None: + cheap_prescreening_gsolv_info["energy"] = 0.0 + if prescreening_sp_info.get("energy") is None: + prescreening_sp_info["energy"] = 0.0 + if lowlevel_sp_info.get("energy") is None: + lowlevel_sp_info["energy"] = 0.0 + if highlevel_sp_info.get("energy") is None: + highlevel_sp_info["energy"] = 0.0 + if prescreening_grrho_info.get("energy") is None: + prescreening_grrho_info["energy"] = 0.0 + if lowlevel_grrho_info.get("energy") is None: + lowlevel_grrho_info["energy"] = 0.0 + self._initialize(lowlevel_grrho_info) + if lowlevel_hrrho_info.get("energy") is None: + lowlevel_hrrho_info["energy"] = 0.0 + self._initialize(lowlevel_hrrho_info) + if prescreening_gsolv_info.get("energy") is None: + prescreening_gsolv_info["energy"] = 0.0 + if lowlevel_gsolv_info.get("energy") is None: + lowlevel_gsolv_info["energy"] = 0.0 + self._initialize(lowlevel_gsolv_info) + if lowlevel_gsolv_compare_info.get("energy") is None: + lowlevel_gsolv_compare_info["energy"] = 0.0 + # if lowlevel_gsolv_compare_info.get("std_dev") is None: + # lowlevel_gsolv_compare_info["std_dev"] = 0.0 + self._initialize(lowlevel_gsolv_compare_info) + if highlevel_gsolv_info.get("energy") is None: + highlevel_gsolv_info["energy"] = 0.0 + self._initialize(highlevel_gsolv_info) + for key in part_info.keys(): + if part_info.get(key) is None: + part_info[key] = "not_calculated" + # highlevel_grrho + if highlevel_grrho_info.get("energy") is None: + highlevel_grrho_info["energy"] = 0.0 + self._initialize(highlevel_grrho_info) + # highlevel_hrrho + if highlevel_hrrho_info.get("energy") is None: + highlevel_hrrho_info["energy"] = 0.0 + self._initialize(highlevel_hrrho_info) + # optical_rotation_info + self._initialize(optical_rotation_info) + + # exceptions: + if not isinstance(rank, int): + raise TypeError( + "Please input an integer. The id is the rank of the " + "molecule in the input ensemble!" + ) + if type(temperature_info.get("temperature", None)) != float: + raise TypeError( + "Please input an float. Thermodynamic properties are" + "evaluated at this temperature!" + ) + if not isinstance(temperature_info.get("range", None), list): + raise ValueError("Please provide a list with temperatures!") + elif any([type(i) != float for i in temperature_info.get("range")]): + raise TypeError("Please provide temperatures as float!") + if not isinstance(chrg, int): + raise TypeError("Please provide charge as integer!") + if not isinstance(uhf, int): + raise TypeError( + "Please provide number of unpaired electrons as " "integer!" + ) + if not isinstance(xtb_energy, float): + raise TypeError("Please provide energy from input ensemble as float!") + if not isinstance(rel_xtb_energy, float): + raise TypeError( + "Please provide rel. energy from input ensemble as " "float!" + ) + if not isinstance(sym, str): + raise TypeError("Please provide symmetry as string.") + if not isinstance(gi, float): + try: + gi = float(gi) + except (TypeError, ValueError): + raise "Please provide gi as float!" + if not isinstance(prescreening_sp_info.get("energy", None), float): + raise TypeError("Please provide preescreening sinlge point as float!") + if not isinstance(lowlevel_sp_info.get("energy", None), float): + raise TypeError("Please provide low level sinlge point as float!") + if not isinstance(highlevel_sp_info.get("energy", None), float): + raise TypeError("Please provide high level sinlge point as float!") + if not isinstance(prescreening_grrho_info.get("energy", None), float): + raise TypeError("Please provide G_RRHO as float!") + if type(lowlevel_grrho_info.get("energy", None)) != float: + raise TypeError("Please provide G_RRHO as float!") + if not isinstance(lowlevel_grrho_info["range"], dict): + raise TypeError("Please input a dict with Grrho values!") + if not isinstance(lowlevel_hrrho_info.get("energy", None), float): + raise TypeError("Please provide H_RRHO as float!") + if any([type(i) != float for i in lowlevel_grrho_info.get("range").values()]): + raise TypeError("Please provide Grrho values as float!") + if not isinstance(lowlevel_hrrho_info["range"], dict): + raise TypeError("Please input a dict with Hrrho values!") + if any([type(i) != float for i in lowlevel_hrrho_info.get("range").values()]): + raise TypeError("Please provide Hrrho values as float!") + if not isinstance(prescreening_gsolv_info.get("energy"), float): + raise TypeError("Please provide Gsolv as float!") + if not isinstance(lowlevel_gsolv_info.get("energy", None), float): + raise TypeError("Please provide Gsolv as float!") + if not isinstance(lowlevel_gsolv_info.get("range"), dict): + raise TypeError("Please input a dict with Gsolv values!") + if any([type(i) != float for i in lowlevel_gsolv_info.get("range").values()]): + raise TypeError("Please provide Gsolv values as float!") + if type(highlevel_gsolv_info.get("energy", None)) != float: + raise TypeError("Please provide Gsolv as float!") + if not isinstance(highlevel_gsolv_info.get("range", None), dict): + raise TypeError("Please input a dict with Gsolv values!") + if any([type(i) != float for i in highlevel_gsolv_info.get("range")]): + raise TypeError("Please provide Gsolv values as float!") + if not isinstance(removed, bool): + raise TypeError("Please provide removed with boolean true/false.") + if any([type(i) != str for i in part_info.values()]): + raise TypeError("Please provide part_info settings as str!") + # assignment: + self.id = rank # this is the rank from the input ensemble + self.temperature_info = temperature_info # temperature for general evaluation + self.chrg = chrg + self.uhf = uhf + self.xtb_energy = xtb_energy + self.xtb_energy_unbiased = xtb_energy_unbiased + self.xtb_free_energy = xtb_free_energy + self.rel_xtb_energy = rel_xtb_energy + self.rel_xtb_free_energy = rel_xtb_free_energy + self.sym = sym + self.gi = gi + self.cheap_prescreening_gsolv_info = cheap_prescreening_gsolv_info + self.cheap_prescreening_sp_info = cheap_prescreening_sp_info + self.prescreening_sp_info = prescreening_sp_info + self.lowlevel_sp_info = lowlevel_sp_info + self.highlevel_sp_info = highlevel_sp_info + self.prescreening_grrho_info = prescreening_grrho_info + self.lowlevel_grrho_info = lowlevel_grrho_info + self.lowlevel_hrrho_info = lowlevel_hrrho_info + self.highlevel_grrho_info = highlevel_grrho_info + self.highlevel_hrrho_info = highlevel_hrrho_info + self.prescreening_gsolv_info = prescreening_gsolv_info + self.lowlevel_gsolv_info = lowlevel_gsolv_info + self.lowlevel_gsolv_compare_info = lowlevel_gsolv_compare_info + self.highlevel_gsolv_info = highlevel_gsolv_info + self.optimization_info = optimization_info + self.nmr_coupling_info = nmr_coupling_info + self.nmr_shielding_info = nmr_shielding_info + self.removed = removed + self.free_energy = free_energy + self.part_info = part_info + self.comment = comment + self.optical_rotation_info = optical_rotation_info + + def _initialize(self, attr=None): + """ + json saves keys as string. Convert some keys to float. + """ + if attr is not None: + if attr.get("range") is None: + attr["range"] = {} + else: # check if keys are float + if isinstance(attr["range"], dict): + new = {} + for key, value in attr["range"].items(): + new[float(key)] = value + attr["range"] = new + for method in attr["prev_methods"]: + if isinstance(attr["prev_methods"][method].get("range"), dict): + new = {} + for key, value in attr["prev_methods"][method]["range"].items(): + new[float(key)] = value + attr["prev_methods"][method]["range"] = new + else: + attr["prev_methods"][method]["range"] = {} + + def reset_range_info(self, trange=None): + """ + Reset all dictionaries concerned with a temperature range and + set info to not calculated. (This is needed if the temperature range was + not calculated in a previous run and is requested in a current run). + trange -> list with temperatures + """ + attributes = [ + "lowlevel_grrho_info", + "lowlevel_hrrho_info", + "lowlevel_gsolv_info", + "highlevel_gsolv_info", + ] + # reset only if not all temperatures are found which are needed in trange + for data in attributes: + reset = False + if trange is not None: + for temp in trange: + if getattr(self, data)["range"].get(temp, None) is None: + reset = True + if reset: + getattr(self, data)["info"] = "not_calculated" + # keep only value at "normal" temperature + getattr(self, data)["range"] = { + self.temperature_info["temperature"]: getattr(self, data)["energy"] + } + # END--- + + def save_prev(self, attr, method): + """ + save dictionary with all information of + previously calculated data + """ + # store data under 'prev_methods'[method] + tmp = {method: {}} + attributes = vars(MoleculeData(0)).get(attr) + if getattr(self, attr)["info"] != "not_calculated": + for key in attributes.keys(): + if key != "prev_methods": + tmp[method][key] = getattr(self, attr).get(key) + getattr(self, attr)["prev_methods"].update(tmp) + + def load_prev(self, attr, method, saveto=None): + """ + load dictionary with all information from + previously calculated data, + if not previously calculated load presets + self --> conf object + attr --> e.g. lowlevel_sp_info + method --> method identifier e.g. func/basis[sm] + saveto --> optional if desired to save data somewhere else + e.g. highlevel_sp_info + """ + attributes = vars(MoleculeData(0)).get(attr) + # check if calculated previously + if getattr(self, attr)["prev_methods"].get(method, None) is not None: + tmp = {} + for key in attributes.keys(): + if key != "prev_methods": + tmp[key] = getattr(self, attr)["prev_methods"][method].get(key) + if saveto is not None: + attr = saveto + getattr(self, attr).update(tmp) + else: + # if not calculated previously reset + for key, value in attributes.items(): + if key != "prev_methods": + getattr(self, attr)[key] = value + + def provide_runinfo(self): + """ + Write dictionary with molecule data information: + """ + runinfo = [] + for key in vars(MoleculeData(0)).keys(): + runinfo.append((key, getattr(self, key))) + return OrderedDict(runinfo) + + def calc_free_energy(self, e=None, solv=None, rrho=None, t=None, out=False): + """ + Calculate free energy for molecule either at normal temperature, + or if the temperature is not None from the range of temperatures. + if out=False free energy is written to self.free_energy + if out=True free energy is simply returned + """ + if t is None: + try: + f = 0.0 + if e is not None: + if e in ("xtb_energy", "xtb_energy_unbiased"): + f += getattr(self, e, 0.0) + else: + f += getattr(self, e, {"energy": 0.0})["energy"] + if solv is not None: + f += getattr(self, solv, {"energy": 0.0})["energy"] + if rrho is not None: + f += getattr(self, rrho, {"energy": 0.0})["energy"] + if not out: + self.free_energy = f + else: + return f + except Exception as error: + print("ERROR", error) + if not out: + self.free_energy = None + else: + return f + else: + try: + f = 0.0 + if e is not None: + if e in ("xtb_energy", "xtb_energy_unbiased"): + f += getattr(self, e, 0.0) + else: + f += getattr(self, e)["energy"] + if solv is not None: + f += getattr(self, solv)["range"].get(t, 0.0) + if rrho is not None: + f += getattr(self, rrho)["range"].get(t, 0.0) + if not out: + self.free_energy = f + else: + return f + except (Exception, KeyError) as error: + print("ERROR in _calc_free_energy: ", error) + if not out: + self.free_energy = None + else: + return f diff --git a/censo_qm/ensembledata.py b/censo_qm/ensembledata.py new file mode 100644 index 0000000..a53df7a --- /dev/null +++ b/censo_qm/ensembledata.py @@ -0,0 +1,48 @@ +class EnsembleData: + def __init__( + self, + id="ensemble_info", + filename=None, + part_info={ + "part0": None, + "part1_firstsort": None, + "part1": None, + "part2_opt": None, + "part2": None, + "part3": None, + }, + avGcorrection=None, + comment=None, + bestconf={"part0": None, "part1": None, "part2": None, "part3": None}, + nconfs_per_part={ + "starting": None, + "part0": None, + "part1_firstsort": None, + "part1": None, + "part2_opt": None, + "part2": None, + "part3": None, + }, + ): + """ + ensemble_data: Creates an object where data + concerning the entire ensemble is stored. + Input: + filename = e.g. crest_conformers.xyz + part_info --> time passed to calculate part + avGcorrection --> information of higher lying conformers + bestconf --> id of best conf per part + nconfs_per_part --> how many confs have been evaluated in each part + + """ + if avGcorrection is None: + avGcorrection = {} + if comment is None: + comment = [] + self.id = id + self.filename = filename + self.part_info = part_info + self.avGcorrection = avGcorrection + self.comment = comment + self.bestconf = bestconf + self.nconfs_per_part = nconfs_per_part diff --git a/censo_qm/inputhandling.py b/censo_qm/inputhandling.py new file mode 100755 index 0000000..a06b0a8 --- /dev/null +++ b/censo_qm/inputhandling.py @@ -0,0 +1,3440 @@ +""" +defininition of internal defaults, checking of logic for parameter combinations, +cml parsing +""" +import argparse +import shutil +import os +import json +import csv +import time +import math +import sys +from copy import deepcopy +from collections import OrderedDict +from .cfg import ( + PLENGTH, + DIGILEN, + ENVIRON, + CODING, + censo_solvent_db, + external_paths, + __version__, +) +from .utilities import frange, format_line, print + + +def cml(startup_description, options, argv=None): + """ + Process commandline arguments + """ + + parser = argparse.ArgumentParser( + description=startup_description, + formatter_class=argparse.RawDescriptionHelpFormatter, + usage=argparse.SUPPRESS, + ) + group1 = parser.add_argument_group("GENERAL SETTINGS") + group1.add_argument( + "-inp", + "--input", + dest="inp", + type=os.path.abspath, + action="store", + # default="crest_conformers.xyz", + required=False, + metavar="", + help="Input name of ensemble file: e.g. crest_conformers.xyz ", + ) + group1.add_argument( + "-nc", + "--nconf", + dest="nconf", + type=int, + action="store", + required=False, + metavar="", + help="Number of conformers which are going to be considered (max number " + "of conformers are all conformers from the input file).", + ) + group1.add_argument( + "-chrg", + "--charge", + dest="charge", + action="store", + required=False, + metavar="", + help="Charge of the investigated molecule.", + ) + group1.add_argument( + "-u", + "--unpaired", + dest="unpaired", + action="store", + required=False, + type=int, + metavar="", + help="Integer number of unpaired electrons of the investigated molecule.", + ) + group1.add_argument( + "-T", + "--temperature", + dest="temperature", + action="store", + required=False, + metavar="", + help="Temperature in Kelvin for thermostatistical evaluation.", + ) + group1.add_argument( + "-multitemp", + "--multitemp", + dest="multitemp", + choices=["on", "off"], + required=False, + metavar="", + help="Needs to be turned on if a temperature range should be evaluated" + " (flag trange). Options for multitemp are: ['on' or 'off'].", + ) + group1.add_argument( + "-trange", + "--trange", + # default=[273.15, 378.15, 5], + dest="trange", + nargs=3, + required=False, + metavar=("start", "end", "step"), + type=float, + help="specify a temperature range [start, end, step] e.g.: 250.0 300.0 10.0" + " resulting in [250.0, 260.0, 270.0, 280.0, 290.0].", + ) + group1.add_argument( + "-bhess", + "--bhess", + dest="bhess", + choices=["on", "off"], + action="store", + required=False, + metavar="", + help="Applies structure constraint to input/DFT geometry for mRRHO calcuation." + "Options are: ['on' or 'off'].", + ) + group1.add_argument( + "-consider_sym", + "---consider_sym", + dest="-consider_sym", + choices=["on", "off"], + action="store", + required=False, + metavar="", + help="Consider symmetry in mRRHO calcuation (based on desy xtb threshold)." + "Options are: ['on' or 'off'].", + ) + group1.add_argument( + "-rmsdbias", + "--rmsdbias", + dest="rmsdbias", + choices=["on", "off"], + action="store", + required=False, + metavar="", + help="Applies constraint to rmsdpot.xyz to be consistent to CREST." + "Options are: ['on' or 'off'].", + ) + group1.add_argument( + "-sm_rrho", + "--sm_rrho", + dest="sm_rrho", + choices=["gbsa", "alpb"], + action="store", + required=False, + metavar="", + help="Solvation model used in xTB GmRRHO calculation. Applied if not in gas-phase. " + "Options are 'gbsa' or 'alpb'.", + ) + group1.add_argument( + "-evaluate_rrho", + "--evaluate_rrho", + dest="evaluate_rrho", + action="store", + choices=["on", "off"], + required=False, + metavar="", + help="Evaluate mRRHO contribution. Options: on or off.", + ) + group1.add_argument( + "-func", + "--functional", + dest="func", + choices=options.value_options["func"], + action="store", + required=False, + metavar="", + help="Functional for geometry optimization (used in part2) and " + "single-points in part1", + ) + group1.add_argument( + "-basis", + "--basis", + dest="basis", + action="store", + required=False, + metavar="", + help="Basis set employed together with the functional (func) for the " + "low level single point in part1 und optimization in part2.", + ) + group1.add_argument( + "-checkinput", + "--checkinput", + dest="checkinput", + action="store_true", + required=False, + help="Option to check if all necessary information for the ENSO " + "calculation are provided and check if certain setting combinations " + "make sence. Option to choose from : ['on' or 'off']", + ) + group1.add_argument( + "-solvent", + "--solvent", + dest="solvent", + choices=options.value_options["solvent"], + metavar="", + action="store", + required=False, + help="Solvent the molecule is solvated in, available solvents " + "are: {}. They can be extended in the " + "file ~/.censo_assets/censo_solvents.json .".format( + options.value_options["solvent"] + ), + ) + group1.add_argument( + "-prog", + "--prog", + choices=options.value_options["prog"], + dest="prog", + required=False, + metavar="", + help="QM-program used in part1 and part2 either 'orca' or 'tm'.", + ) + group1.add_argument( + "-prog_rrho", + "--prog_rrho", + choices=options.value_options["prog_rrho"], + dest="prog_rrho", + required=False, + metavar="", + help="QM-program for mRRHO contribution in part1 2 and 3, either 'xtb' or 'prog'.", + ) + group1.add_argument( + "-crestcheck", + "--crestcheck", + dest="crestcheck", + choices=["on", "off"], + action="store", + required=False, + metavar="", + help="Option to sort out conformers after DFT optimization which CREST " + "identifies as identical or rotamers of each other. \nThe identification/" + "analysis is always performed, but the removal of conformers has to " + "be the choice of the user. Options are: ['on' or 'off']", + ) + group1.add_argument( + "-check", + "--check", + dest="check", + choices=["on", "off"], + action="store", + required=False, + help="Option to terminate the ENSO-run if too many calculations/preparation" + " steps fail. Options are: ['on' or 'off'].", + ) + group1.add_argument( + "-version", + "--version", + dest="version", + action="store_true", + required=False, + help="Print CENSO version and exit.", + ) + group1.add_argument( + "-part3only", + "--part3only", + dest="part3only", + required=False, + action="store_true", + help="Option to turn off part1 and part2", + ) + group2 = parser.add_argument_group("SPECIAL RUN MODES") + group2.add_argument( + "-logK", + "--logK", + action="store_true", + required=False, + default=False, + help="Automatically set required settings for logK calculation. " + "Of course charge, solvent etc. has to be set by the user.", + ) + group10 = parser.add_argument_group("CRE CHEAP-PRESCREENING - PART0") + group10.add_argument( + "-part0", + "--part0", + choices=["on", "off"], + dest="part0", + action="store", + required=False, + metavar="", + help="Option to turn the CHEAP prescreening evaluation (part0) which " + "improves description of ΔE 'on' or 'off'.", + ) + group10.add_argument( + "-func0", + "--func0", + dest="func0", + choices=options.value_options["func0"], + action="store", + required=False, + metavar="", + help="Functional for fast single-point (used in part0)", + ) + group10.add_argument( + "-basis0", + "--basis0", + dest="basis0", + action="store", + required=False, + metavar="", + help="Basis set employed together with the functional (func0) for the " + "fast single point calculation in part0.", + ) + group10.add_argument( + "-part0_gfnv", + "--part0_gfnv", + dest="part0_gfnv", + choices=options.value_options["part0_gfnv"], + metavar="", + action="store", + required=False, + help="GFNn-xTB version employed for calculating the gas phase GFNn-xTB " + "single point in part0. " + f"Allowed values are [{', '.join(options.value_options['part0_gfnv'])}]", + ) + group10.add_argument( + "-part0_threshold", + "-thrpart0", + "--thresholdpart0", + dest="part0_threshold", + metavar="", + action="store", + required=False, + help=( + "Threshold in kcal/mol. All conformers in part0 (cheap single-point)" + " with a relativ energy below the threshold are considered for part1." + ), + ) + + group3 = parser.add_argument_group("CRE PRESCREENING - PART1") + group3.add_argument( + "-part1", + "--part1", + choices=["on", "off"], + dest="part1", + action="store", + required=False, + metavar="", + help="Option to turn the prescreening evaluation (part1) 'on' or 'off'.", + ) + group3.add_argument( + "-smgsolv1", + "--smgsolv1", + choices=options.value_options["smgsolv1"], + dest="smgsolv1", + action="store", + required=False, + metavar="", + help="Solvent model for the Gsolv evaluation in part1. This can either be" + " an implicit solvation or an additive solvation model. " + f"Allowed values are [{', '.join(options.value_options['smgsolv1'])}]", + ) + group3.add_argument( + "-part1_gfnv", + "--part1_gfnv", + dest="part1_gfnv", + choices=options.value_options["part1_gfnv"], + metavar="", + action="store", + required=False, + help="GFNn-xTB version employed for calculating the " + "mRRHO contribution in part1. " + f"Allowed values are [{', '.join(options.value_options['part1_gfnv'])}]", + ) + group3.add_argument( + "-thrpart1", + "--thresholdpart1", + "-part1_threshold", + dest="part1_threshold", + metavar="", + action="store", + required=False, + help=( + "Threshold in kcal/mol. All conformers in part1 (lax_single-point)" + " with a relativ energy below the threshold are considered for part2." + ), + ) + + group4 = parser.add_argument_group("CRE OPTIMIZATION - PART2") + group4.add_argument( + "-part2", + "--part2", + choices=["on", "off"], + dest="part2", + action="store", + required=False, + metavar="", + help="Option to turn the full optimization (part2) 'on' or 'off'.", + ) + group4.add_argument( + "-sm2", + "--solventmodel2", + choices=options.value_options.get("sm2"), + dest="sm2", + action="store", + required=False, + metavar="", + help="Solvent model employed during the geometry optimization part2." + "The solvent model sm2 is not used for Gsolv evaluation, but for the " + "implicit effect on a property (e.g. the optimization).", + ) + group4.add_argument( + "-smgsolv2", + "--smgsolv2", + choices=options.value_options["smgsolv2"], + dest="smgsolv2", + action="store", + required=False, + metavar="", + help="Solvent model for the Gsolv calculation in part2. Either the solvent" + " model of the optimization (sm) or an additive solvation model. " + f"Allowed values are [{', '.join(options.value_options['smgsolv2'])}]", + ) + group4.add_argument( + "-part2_gfnv", + "--part2_gfnv", + dest="part2_gfnv", + choices=options.value_options["part2_gfnv"], + metavar="", + action="store", + required=False, + help="GFNn-xTB version employed for calculating the " + "mRRHO contribution in part2. " + f"Allowed values are [{', '.join(options.value_options['part2_gfnv'])}]", + ) + group4.add_argument( + "-ancopt", + choices=["on", "off"], + dest="ancopt", + required=False, + metavar="", + help="Option to use xtb as driver for the xTB-optimizer in part2.", + ) + group4.add_argument( + "-opt_spearman", + choices=["on", "off"], + dest="opt_spearman", + required=False, + metavar="", + help="Option to use an optimizer which checks if the hypersurface of DFT and" + "xTB is parallel and optimizes mainly low lying conformers", + ) + group4.add_argument( + "-optlevel2", + "--optlevel2", + choices=options.value_options["optlevel2"], + dest="optlevel2", + default=None, + required=False, + metavar="", + help="Option to set the optlevel in part2, only if optimizing with the xTB-optimizer!" + "Allowed values are " + ", ".join(options.value_options["optlevel2"]), + ) + group4.add_argument( + "-optcycles", + "--optcycles", + dest="optcycles", + action="store", + required=False, + type=int, + metavar="", + help="number of cycles in ensemble optimizer.", + ) + group4.add_argument( + "-hlow", + "--hlow", + dest="hlow", + action="store", + required=False, + type=float, + metavar="", + help="Lowest force constant in ANC generation (real), used by xTB-optimizer.", + ) + group4.add_argument( + "-spearmanthr", + "-spearmanthr", + dest="spearmanthr", + action="store", + required=False, + metavar="", + help="Value between -1 and 1 for the spearman correlation coeffient threshold", + ) + group4.add_argument( + "-opt_limit", + "--opt_limit", + dest="opt_limit", + action="store", + required=False, + metavar="", + help=( + "Lower limit Threshold in kcal/mol. If the GFNn and DFT hypersurfaces are" + "assumed parallel, the conformers above the threshold are removed and not optimized further." + "The conformers in part2 with a relativ free energy below the " + "threshold are fully optimized." + ), + ) + group4.add_argument( + "-thrpart2", + "--thresholdpart2", + dest="part2_threshold", + action="store", + required=False, + metavar="", + help=( + "Boltzmann population sum threshold for part2 in %%. The conformers with " + "the highest Boltzmann weigths are summed up until the threshold is reached." + "E.g. all conformers up to a Boltzmann population of 90 %% are considered." + 'Example usage: "-thrpart2 99" --> considers a population of 99 %%' + ), + ) + group4.add_argument( + "-radsize", + "--radsize", + dest="radsize", + action="store", + required=False, + metavar="", + type=int, + help=("Radsize used in optimization and only for r2scan-3c!"), + ) + group5 = parser.add_argument_group("CRE REFINEMENT - PART3") + group5.add_argument( + "-part3", + "--part3", + choices=["on", "off"], + dest="part3", + action="store", + required=False, + metavar="", + help="Option to turn the high level free energy evaluation (part3) 'on' or 'off'.", + ) + group5.add_argument( + "-prog3", + "--prog3", + choices=options.value_options["prog3"], + dest="prog3", + required=False, + metavar="", + help="QM-program used in part3 either 'orca' or 'tm'.", + ) + group5.add_argument( + "-func3", + "--functionalpart3", + dest="func3", + # choices=func3, + action="store", + required=False, + metavar="", + help="Functional for the COSMO-RS calculation, use functional " + "names as recognized by cefine.", + ) + group5.add_argument( + "-basis3", + "--basis3", + dest="basis3", + action="store", + required=False, + metavar="", + help="Basis set employed together with the functional (func3) for the " + "high level single point in part3.", + ) + group5.add_argument( + "-smgsolv3", + "--smgsolv3", + choices=options.value_options["smgsolv3"], + dest="smgsolv3", + action="store", + required=False, + metavar="", + help="Solvent model for the Gsolv calculation in part3. Either the solvent" + " model of the optimization (sm2) or an additive solvation model.", + ) + group5.add_argument( + "-part3_gfnv", + "--part3_gfnv", + dest="part3_gfnv", + choices=options.value_options["part3_gfnv"], + metavar="", + action="store", + required=False, + help="GFNn-xTB version employed for calculating the " + "mRRHO contribution in part3. " + f"Allowed values are [{', '.join(options.value_options['part3_gfnv'])}]", + ) + group5.add_argument( + "-thrpart3", + "--thresholdpart3", + dest="part3_threshold", + action="store", + required=False, + metavar="", + help=( + "Boltzmann population sum threshold for part3 in %%. The conformers with " + "the highest Boltzmann weigths are summed up until the threshold is reached." + "E.g. all conformers up to a Boltzmann population of 90 %% are considered" + 'Example usage: "-thrpart3 99" --> considers a population of 99 %%' + ), + ) + group6 = parser.add_argument_group("NMR Mode") + group6.add_argument( + "-part4", + "--part4", + choices=["on", "off"], + dest="part4", + action="store", + required=False, + metavar="", + help="Option to turn the NMR property calculation mode (part4) 'on' or 'off'.", + ) + group6.add_argument( + "-couplings", + "--couplings", + dest="couplings", + required=False, + choices=["on", "off"], + metavar="", + help="Option to run coupling constant calculations. Options are 'on' or 'off'.", + ) + group6.add_argument( + "-prog4J", + "--prog4J", + # choices=options.value_options["prog"], + dest="prog4_j", + required=False, + metavar="", + help="QM-program for the calculation of coupling constants.", + ) + group6.add_argument( + "-funcJ", + "--funcJ", + dest="func_j", + # choices=func3, + action="store", + required=False, + metavar="", + help="Functional for the coupling constant calculation.", + ) + group6.add_argument( + "-basisJ", + "--basisJ", + dest="basis_j", + action="store", + required=False, + metavar="", + help="Basis set for the calculation of coupling constants.", + ) + group6.add_argument( + "-sm4_j", + "--sm4_j", + dest="sm4_j", + action="store", + required=False, + metavar="", + help="Solvation model used in the coupling constant calculation.", + ) + group6.add_argument( + "-shieldings", + "--shieldings", + dest="shieldings", + required=False, + choices=["on", "off"], + metavar="", + help="Option to run shielding constant calculations. Options are 'on' or 'off'.", + ) + group6.add_argument( + "-prog4S", + "--prog4S", + # choices=options.value_options["prog"], + dest="prog4_s", + required=False, + metavar="", + help="QM-program for the calculation of shielding constants.", + ) + group6.add_argument( + "-funcS", + "--funcS", + dest="func_s", + # choices=func3, + action="store", + required=False, + metavar="", + help="Functional for shielding constant calculation.", + ) + group6.add_argument( + "-basisS", + "--basisS", + dest="basis_s", + action="store", + required=False, + metavar="", + help="Basis set for the calculation of shielding constants.", + ) + group6.add_argument( + "-sm4_s", + "--sm4_s", + dest="sm4_s", + action="store", + required=False, + metavar="", + help="Solvation model used in the shielding constant calculation.", + ) + group6.add_argument( + "-hactive", + "--hactive", + # choices=options.value_options["prog"], + dest="h_active", + required=False, + metavar="", + help="Investigates hydrogen nuclei in coupling and shielding calculations.", + ) + group6.add_argument( + "-cactive", + "--cactive", + # choices=options.value_options["prog"], + dest="c_active", + required=False, + metavar="", + help="Investigates carbon nuclei in coupling and shielding calculations.", + ) + group6.add_argument( + "-factive", + "--factive", + # choices=options.value_options["prog"], + dest="f_active", + required=False, + metavar="", + help="Investigates fluorine nuclei in coupling and shielding calculations.", + ) + group6.add_argument( + "-siactive", + "--siactive", + # choices=options.value_options["prog"], + dest="si_active", + required=False, + metavar="", + help="Investigates silicon nuclei in coupling and shielding calculations.", + ) + group6.add_argument( + "-pactive", + "--pactive", + # choices=options.value_options["prog"], + dest="p_active", + required=False, + metavar="", + help="Investigates phosophorus nuclei in coupling and shielding calculations.", + ) + group9 = parser.add_argument_group("OPTICAL ROTATION MODE") + group9.add_argument( + "-OR", + "--OR", + "-part5", + choices=["on", "off"], + action="store", + dest="optical_rotation", + required=False, + help="Do optical rotation calculation.", + ) + group9.add_argument( + "-funcOR", + "--funcOR", + dest="func_or", + # choices=func_or, + action="store", + required=False, + metavar="", + help="Functional for optical rotation calculation.", + ) + group9.add_argument( + "-funcOR_SCF", + "--funcOR_SCF", + dest="func_or_scf", + # choices=func_or, + action="store", + required=False, + metavar="", + help="Functional used in SCF for optical rotation calculation.", + ) + group9.add_argument( + "-basisOR", + "--basisOR", + dest="basis_or", + # choices=func_or, + action="store", + required=False, + metavar="", + help="Basis set for optical rotation calculation.", + ) + group9.add_argument( + "-freqOR", + "--freqOR", + dest="freq_or", + nargs="*", + required=False, + type=float, + metavar="", + help="Frequencies to evaluate specific rotation at in nm. E.g. 589 " + "Or 589 700 to evaluate at 598 nm and 700 nm.", + ) + + group7 = parser.add_argument_group("OPTIONS FOR PARALLEL CALCULATIONS") + group7.add_argument( + "-O", + "--omp", + dest="omp", + type=int, + action="store", + metavar="", + help="Number of cores each thread can use. E.g. (maxthreads) 5 threads " + "with each (omp) 4 cores --> 20 cores need to be available on the machine.", + ) + group7.add_argument( + "-P", + "--maxthreads", + dest="maxthreads", + type=int, + action="store", + metavar="", + help="Number of threads during the ENSO calculation. E.g. (maxthreads) 5" + " threads with each (omp) 4 cores --> 20 cores need to be available on " + "the machine.", + ) + group8 = parser.add_argument_group("CREATION/DELETION OF FILES") + group8.add_argument( + "--debug", + "-debug", + dest="debug", + action="store_true", + default=False, + help=argparse.SUPPRESS, + ) + group8.add_argument( + "--restart", + "-restart", + dest="restart", + action="store_true", + default=False, + help=argparse.SUPPRESS, + ) + group8.add_argument( + "--cleanup", + "-cleanup", + dest="cleanup", + action="store_true", + default=False, + help="Delete unneeded files from current working directory.", + ) + group8.add_argument( + "--cleanup_all", + "-cleanup_all", + dest="cleanup_all", + action="store_true", + default=False, + help="Delete all unneeded files from current working directory. " + "Stronger than -cleanup !", + ) + group8.add_argument( + "-newconfig", + "-write_ensorc", + "--write_ensorc", + dest="writeconfig", + default=False, + action="store_true", + required=False, + help="Write new configuration file , which is placed into the current " + "directory.", + ) + + args = parser.parse_args(argv) + + # apply logK settings but don't override user input! + if args.logK: + logk_settings = OrderedDict( + [ + # general/cross-over settings + ("multitemp", "on"), + ("evaluate_rrho", "on"), + ("bhess", "on"), + ("crestcheck", "on"), + # part 1 + ("part1", "on"), + ("smgsolv1", "cosmors"), + # part2 + ("part2", "on"), + ("ancopt", "on"), + ("smgsolv2", "cosmors"), + ("opt_spearman", "on"), + ("spearmanthr", -4), + # part3 + ("smgsolv3", "cosmors"), + ] + ) + for key in logk_settings.keys(): + if not getattr(args, key): + setattr(args, key, logk_settings[key]) + # --------------------------end logK---------------------------------------- + if args.part3only: + setattr(args, "part0", "off") + setattr(args, "part1", "off") + setattr(args, "part2", "off") + return args + + +class internal_settings: + """ + All options are saved here. + """ + + # key in .censorc corresponds to name in cml + key_args_dict = { + "nconf": "nconf", + "charge": "charge", + "unpaired": "unpaired", + "solvent": "solvent", + "prog": "prog", + "ancopt": "ancopt", + "opt_spearman": "opt_spearman", + "evaluate_rrho": "evaluate_rrho", + "consider_sym": "consider_sym", + "prog_rrho": "prog_rrho", + "part0_gfnv": "part0_gfnv", + "part1_gfnv": "part1_gfnv", + "part2_gfnv": "part2_gfnv", + "part3_gfnv": "part3_gfnv", + "temperature": "temperature", + "multitemp": "multitemp", + "trange": "trange", + "prog3": "prog3", + "prog4_j": "prog4_j", + "prog4_s": "prog4_s", + "part0": "part0", + "part1": "part1", + "part2": "part2", + "part3": "part3", + "part4": "part4", + "func0": "func0", + "func": "func", + "basis0": "basis0", + "basis": "basis", + "func3": "func3", + "basis3": "basis3", + "couplings": "couplings", + "progJ": "prog4_j", + "funcJ": "func_j", + "basisJ": "basis_j", + "shieldings": "shieldings", + "progS": "prog4_s", + "funcS": "func_s", + "basisS": "basis_s", + "part0_threshold": "part0_threshold", + "part1_threshold": "part1_threshold", + "part2_threshold": "part2_threshold", + "part3_threshold": "part3_threshold", + "opt_limit": "opt_limit", + "smgsolv1": "smgsolv1", + "sm2": "sm2", + "smgsolv2": "smgsolv2", + "smgsolv3": "smgsolv3", + "sm4J": "sm4_j", + "sm4S": "sm4_s", + "check": "check", + "crestcheck": "crestcheck", + "maxthreads": "maxthreads", + "omp": "omp", + "1H_active": "h_active", + "13C_active": "c_active", + "19F_active": "f_active", + "31P_active": "p_active", + "29Si_active": "si_active", + "resonance_frequency": "resonance_frequency", + "reference_1H": "h_ref", + "reference_13C": "c_ref", + "reference_31P": "p_ref", + "reference_19F": "f_ref", + "reference_29Si": "si_ref", + "bhess": "bhess", + "sm_rrho": "sm_rrho", + "optcycles": "optcycles", + "optlevel2": "optlevel2", + "spearmanthr": "spearmanthr", + "optical_rotation": "optical_rotation", + "radsize": "radsize", + "frequency_optical_rot": "freq_or", + "funcOR": "func_or", + "basisOR": "basis_or", + "funcOR_SCF": "func_or_scf", + "hlow": "hlow", + "rmsdbias": "rmsdbias", + } + knownbasissets3 = [ + "SVP", + "SV(P)", + "TZVP", + "TZVPP", + "QZVP", + "QZVPP", + "def2-SV(P)", + "def2-mSVP", + "def2-SVP", + "def2-TZVP", + "def2-TZVPP", + "def2-mTZVP", + "def2-mTZVPP", + "def2-TZVPD", + "def-SVP", + "def-SV(P)", + "def2-QZVP", + "DZ", + "QZV", + "cc-pVDZ", + "cc-pVTZ", + "cc-pVQZ", + "cc-pV5Z", + "aug-cc-pVDZ", + "aug-cc-pVTZ", + "aug-cc-pVQZ", + "aug-cc-pV5Z", + "def2-QZVPP", + "minix", + ] + # information on functionals: + composite_method_basis = { + "pbeh-3c": "def2-mSVP", + "b97-3c": "def2-mTZVP", + "b973c": "def2-mTZVP", + "hf3c": "minix", + "hf-3c": "minix", + "r2scan-3c": "def2-mTZVPP", + } + composite_dfa = ( + "pbeh-3c", + "b97-3c", + "b973c", + "hf-3c", + "hf3c", + "r2scan-3c", + ) # + hf3c ; ) + gga_dfa = ("tpss", "pbe", "kt2") + hybrid_dfa = ( + "pbe0", + "pw6b95", + "wb97x-d3", + "cam-b3lyp", + "b3-lyp", + "pbeh-3c", + "m06x", + "bh-lyp", + "tpssh", + ) + dh_dfa = ("dsd-blyp",) + + knownbasissetsJ = knownbasissets3 + ["pcJ-0", "pcJ-1", "pcJ-2"] + knownbasissetsS = knownbasissets3 + [ + "pcSseg-0", + "pcSseg-1", + "pcSseg-2", + "pcSseg-3", + "x2c-SVPall-s", + "x2c-TZVPall-s", + ] + func_orca = ["pbeh-3c", "b97-3c", "tpss", "b97-d3", "pbe"] + func_tm = ["pbeh-3c", "b97-3c", "tpss", "r2scan-3c", "b97-d", "pbe"] + func3_orca = ["pw6b95", "pbe0", "wb97x", "dsd-blyp"] + func3_tm = ["pw6b95", "pbe0", "b97-d3", "r2scan-3c"] + func_j_tm = ["tpss", "pbe0", "pbeh-3c"] + func_j_orca = ["tpss", "pbe0", "pbeh-3c"] + func_s_tm = ["tpss", "pbe0", "pbeh-3c", "kt2"] + func_s_orca = ["tpss", "pbe0", "dsd-blyp", "pbeh-3c", "kt2"] + impgfnv = ["gfn1", "gfn2", "gfnff"] + tmp_smd_solvents = [ + "1,1,1-TRICHLOROETHANE", + "1,1,2-TRICHLOROETHANE", + "1,2,4-TRIMETHYLBENZENE", + "1,2-DIBROMOETHANE", + "1,2-DICHLOROETHANE", + "1,2-ETHANEDIOL", + "1,4-DIOXANE", + "1-BROMO-2-METHYLPROPANE", + "1-BROMOOCTANE", + "1-BROMOPENTANE", + "1-BROMOPROPANE", + "1-BUTANOL", + "1-CHLOROHEXANE", + "1-CHLOROPENTANE", + "1-CHLOROPROPANE", + "1-DECANOL", + "1-FLUOROOCTANE", + "1-HEPTANOL", + "1-HEXANOL", + "1-HEXENE", + "1-HEXYNE", + "1-IODOBUTANE", + "1-IODOHEXADECANE", + "1-IODOPENTANE", + "1-IODOPROPANE", + "1-NITROPROPANE", + "1-NONANOL", + "1-OCTANOL", + "1-PENTANOL", + "1-PENTENE", + "1-PROPANOL", + "2,2,2-TRIFLUOROETHANOL", + "2,2,4-TRIMETHYLPENTANE", + "2,4-DIMETHYLPENTANE", + "2,4-DIMETHYLPYRIDINE", + "2,6-DIMETHYLPYRIDINE", + "2-BROMOPROPANE", + "2-BUTANOL", + "2-CHLOROBUTANE", + "2-HEPTANONE", + "2-HEXANONE", + "2-METHOXYETHANOL", + "2-METHYL-1-PROPANOL", + "2-METHYL-2-PROPANOL", + "2-METHYLPENTANE", + "2-METHYLPYRIDINE", + "2-NITROPROPANE", + "2-OCTANONE", + "2-PENTANONE", + "2-PROPANOL", + "2-PROPEN-1-OL", + "E-2-PENTENE", + "3-METHYLPYRIDINE", + "3-PENTANONE", + "4-HEPTANONE", + "4-METHYL-2-PENTANONE", + "4-METHYLPYRIDINE", + "5-NONANONE", + "ACETIC ACID", + "ACETONE", + "ACETONITRILE", + "ACETOPHENONE", + "ANILINE", + "ANISOLE", + "BENZALDEHYDE", + "BENZENE", + "BENZONITRILE", + "BENZYL ALCOHOL", + "BROMOBENZENE", + "BROMOETHANE", + "BROMOFORM", + "BUTANAL", + "BUTANOIC ACID", + "BUTANONE", + "BUTANONITRILE", + "BUTYL ETHANOATE", + "BUTYLAMINE", + "N-BUTYLBENZENE", + "SEC-BUTYLBENZENE", + "TERT-BUTYLBENZENE", + "CARBON DISULFIDE", + "CARBON TETRACHLORIDE", + "CHLOROBENZENE", + "CHLOROFORM", + "A-CHLOROTOLUENE", + "O-CHLOROTOLUENE", + "M-CRESOL", + "O-CRESOL", + "CYCLOHEXANE", + "CYCLOHEXANONE", + "MeCN", + "CCl4", + "CYCLOPENTANE", + "CYCLOPENTANOL", + "CYCLOPENTANONE", + "DECALIN (CIS/TRANS MIXTURE)", + "CIS-DECALIN", + "N-DECANE", + "DIBROMOMETHANE", + "DIBUTYLETHER", + "O-DICHLOROBENZENE", + "E-1,2-DICHLOROETHENE", + "Z-1,2-DICHLOROETHENE", + "DICHLOROMETHANE", + "DIETHYL ETHER", + "DIETHYL SULFIDE", + "DIETHYLAMINE", + "DIIODOMETHANE", + "DIISOPROPYL ETHER", + "CIS-1,2-DIMETHYLCYCLOHEXANE", + "DIMETHYL DISULFIDE", + "N,N-DIMETHYLACETAMIDE", + "N,N-DIMETHYLFORMAMIDE", + "DIMETHYLSULFOXIDE", + "DIPHENYLETHER", + "DIPROPYLAMINE", + "N-DODECANE", + "ETHANETHIOL", + "ETHANOL", + "ETHYL ETHANOATE", + "ETHYL METHANOATE", + "ETHYL PHENYL ETHER", + "ETHYLBENZENE", + "FLUOROBENZENE", + "FORMAMIDE", + "FORMIC ACID", + "N-HEPTANE", + "N-HEXADECANE", + "N-HEXANE", + "HEXANOIC ACID", + "IODOBENZENE", + "IODOETHANE", + "IODOMETHANE", + "ISOPROPYLBENZENE", + "P-ISOPROPYLTOLUENE", + "MESITYLENE", + "METHANOL", + "METHYL BENZOATE", + "METHYL BUTANOATE", + "METHYL ETHANOATE", + "METHYL METHANOATE", + "METHYL PROPANOATE", + "N-METHYLANILINE", + "METHYLCYCLOHEXANE", + "N-METHYLFORMAMIDE", + "NITROBENZENE", + "NITROETHANE", + "NITROMETHANE", + "O-NITROTOLUENE", + "N-NONANE", + "N-OCTANE", + "N-PENTADECANE", + "PENTANAL", + "N-PENTANE", + "PENTANOIC ACID", + "PENTYL ETHANOATE", + "PENTYLAMINE", + "PERFLUOROBENZENE", + "PROPANAL", + "PROPANOIC ACID", + "PROPANONITRILE", + "PROPYL ETHANOATE", + "PROPYLAMINE", + "PYRIDINE", + "TETRACHLOROETHENE", + "TETRAHYDROFURAN", + "TETRAHYDROTHIOPHENE-S,S-DIOXIDE", + "TETRALIN", + "THIOPHENE", + "THIOPHENOL", + "TOLUENE", + "TRANS-DECALIN", + "TRIBUTYLPHOSPHATE", + "TRICHLOROETHENE", + "TRIETHYLAMINE", + "N-UNDECANE", + "WATER", + "XYLENE (MIXTURE)", + "M-XYLENE", + "O-XYLENE", + "P-XYLENE", + "DMF", + "DMSO", + "PhNO2", + "MeNO2", + "THF", + ] + solvents_smd = [i.lower() for i in tmp_smd_solvents] + solvents_xtb = [ + "acetone", + "acetonitrile", + "aniline", + "benzaldehyde", + "benzene", + "chcl3", + "ch2cl2", + "ccl4", + "cs2", + "dioxane", + "dmf", + "dmso", + "ether", + "ethylacetate", + "furane", + "hexadecane", + "hexane", + "h2o", + "water", + "methanol", + "nitromethane", + "thf", + "toluene", + "octanol", + "woctanol", + "phenol", + ] + solvents_cpcm = [ + "water", + "acetone", + "acetonitrile", + "ammonia", + "benzene", + "chloroform", + "ch2cl2", + "ccl4", + "cyclohexane", + "dmf", + "dmso", + "ethanol", + "hexane", + "methanol", + "octanol", + "pyridine", + "thf", + "toluene", + ] + solvents_cosmors = [ + "propanone_c0", + "chcl3_c0", + "acetonitrile_c0", + "ch2cl2_c0", + "dimethylsulfoxide_c0", + "h2o_c0", + "methanol_c0", + "thf_c0", + "toluene_c0", + "1-octanol_c0", + "woctanol", # this is a mixture and treated differently + "n-hexadecane_c0", + "dimethylformamide_c0", + "aniline_c0", + "cyclohexane_c0", + "ccl4_c0", + "diethylether_c0", + "ethanol_c0", + "hexane_c0", + "nitromethane_c0", + "benzaldehyde_c0", + "benzene_c0", + "cs2_c0", + "dioxane_c0", + "ethylacetate_c0", + "furane_c0", + "phenol_c0", + ] + + # only using the dielectric constant (DC) for cosmo + + # dcosmorsfile name = e.g. acetonitrile + '_25.pot' + solvents_dcosmors = [ + "acetonitrile", + "aniline", + "benzene", + "ccl4", + "chcl3", + "cyclohexane", + "diethylether", + "dimethylsulfoxide", + "ethanol", + "h2o", + "hexadecane", + "hexane", + "methanol", + "nitromethane", + "octanol", + "propanone", + "thf", + "toluene", + "wet-octanol", + ] + + smgsolv_1 = ["cosmors", "cosmors-fine", "gbsa_gsolv", "alpb_gsolv", "smd_gsolv"] + sm2_tm = ["cosmo", "dcosmors"] + sm2_orca = ["cpcm", "smd"] + smgsolv_2 = ["cosmors", "cosmors-fine", "gbsa_gsolv", "alpb_gsolv", "smd_gsolv"] + smgsolv3_tm = ["cosmo", "dcosmors"] + smgsolv3_orca = ["cpcm", "smd"] + smgsolv_3 = ["cosmors", "cosmors-fine", "gbsa_gsolv", "alpb_gsolv", "smd_gsolv"] + sm4_j_tm = ["cosmo", "dcosmors"] + sm4_s_tm = ["cosmo", "dcosmors"] + sm4_j_orca = ["cpcm", "smd"] + sm4_s_orca = ["cpcm", "smd"] + + imphref = ["TMS"] + impcref = ["TMS"] + impfref = ["CFCl3"] + imppref = ["TMP", "PH3"] + impsiref = ["TMS"] + + func_basis_default = { + "pbeh-3c": "def2-mSVP", + "b97-3c": "def2-mTZVP", + "b973c": "def2-mTZVP", + "tpss": "def2-TZVP", + "r2scan-3c": "def2-mTZVPP", + "hf-3c": "minix", + "hf3c": "minix", + } + + def __init__(self): + self.impfunc = list(set(self.func_orca + self.func_tm)) + self.impfunc3 = list(set(self.func3_orca + self.func3_tm)) + self.impfunc_j = list(set(self.func_j_orca + self.func_j_tm)) + self.impfunc_s = list(set(self.func_s_orca + self.func_s_tm)) + self.impsm2 = list(set(self.sm2_orca + self.sm2_tm + ["default"])) + self.impsmgsolv1 = list( + set(self.sm2_orca + self.sm2_tm + self.smgsolv_2 + ["sm2"]) + ) + self.impsmgsolv2 = list( + set(self.sm2_orca + self.sm2_tm + self.smgsolv_2 + ["sm2"]) + ) + self.impsmgsolv3 = list( + set(self.sm2_orca + self.sm2_tm + self.smgsolv_2 + ["sm2"]) + ) + self.impsm4_j = list(set(self.sm4_j_orca + self.sm4_j_tm)) + self.impsm4_s = list(set(self.sm4_s_orca + self.sm4_s_tm)) + + self.defaults_refine_ensemble_general = [ + # general settings + ("nconf", {"default": None, "type": int}), + ("charge", {"default": 0, "type": int}), + ("unpaired", {"default": 0, "type": int}), + ("solvent", {"default": "gas", "type": str}), + ("prog_rrho", {"default": "xtb", "type": str}), + ("temperature", {"default": 298.15, "type": float}), + ("trange", {"default": [273.15, 378.15, 5], "type": list}), + ("multitemp", {"default": True, "type": bool}), + ("evaluate_rrho", {"default": True, "type": bool}), + ("consider_sym", {"default": False, "type": bool}), + ("bhess", {"default": True, "type": bool}), + ("rmsdbias", {"default": False, "type": bool}), + ("sm_rrho", {"default": "alpb", "type": str}), + ("check", {"default": True, "type": bool}), + ("prog", {"default": "tm", "type": str}), + ("func", {"default": "r2scan-3c", "type": str}), + ("basis", {"default": "automatic", "type": str}), + ("maxthreads", {"default": 1, "type": int}), + ("omp", {"default": 1, "type": int}), + ] + self.defaults_refine_ensemble_part0 = [ + # part0 + ("part0", {"default": True, "type": bool}), + ("func0", {"default": "b97-d", "type": str}), + ("basis0", {"default": "def2-SV(P)", "type": str}), + ("part0_gfnv", {"default": "gfn2", "type": str}), + ("part0_threshold", {"default": 4.0, "type": float}), + ] + self.defaults_refine_ensemble_part1 = [ + # part1 + ("part1", {"default": True, "type": bool}), + ("smgsolv1", {"default": "cosmors", "type": str}), # previously sm2 + ("part1_gfnv", {"default": "gfn2", "type": str}), + ("part1_threshold", {"default": 3.5, "type": float}), + ] + self.defaults_refine_ensemble_part2 = [ + # part2 + ("part2", {"default": True, "type": bool}), + ("opt_limit", {"default": 2.5, "type": float}), + ("sm2", {"default": "default", "type": str}), + ("smgsolv2", {"default": "cosmors", "type": str}), # previously sm2 + ("part2_gfnv", {"default": "gfn2", "type": str}), + ("ancopt", {"default": True, "type": bool}), + ("hlow", {"default": 0.01, "type": float}), + ("opt_spearman", {"default": True, "type": bool}), + ("part2_threshold", {"default": 99, "type": float}), + ("optlevel2", {"default": "automatic", "type": str}), + ("optcycles", {"default": 8, "type": int}), + ("spearmanthr", {"default": -4.0, "type": float}), + ("radsize", {"default": 10, "type": int}), + ("crestcheck", {"default": False, "type": bool}), + ] + self.defaults_refine_ensemble_part3 = [ + # part3 + ("part3", {"default": False, "type": bool}), + ("prog3", {"default": "prog", "type": str}), + ("func3", {"default": "pw6b95", "type": str}), # previously b97-d + ("basis3", {"default": "def2-TZVPD", "type": str}), + ("smgsolv3", {"default": "cosmors", "type": str}), # previously sm2 + ("part3_gfnv", {"default": "gfn2 ", "type": str}), + ("part3_threshold", {"default": 99, "type": float}), + ] + self.defaults_nmrprop_part4 = [ + # part4 + ("part4", {"default": False, "type": bool}), + ("couplings", {"default": True, "type": bool}), + ("prog4_j", {"default": "prog", "type": str}), + ("func_j", {"default": "pbe0", "type": str}), + ("basis_j", {"default": "def2-TZVP", "type": str}), + ("sm4_j", {"default": "default", "type": str}), + ("shieldings", {"default": True, "type": bool}), + ("prog4_s", {"default": "prog", "type": str}), + ("func_s", {"default": "pbe0", "type": str}), + ("basis_s", {"default": "def2-TZVP", "type": str}), + ("sm4_s", {"default": "default", "type": str}), + ("h_ref", {"default": "TMS", "type": str}), + ("c_ref", {"default": "TMS", "type": str}), + ("f_ref", {"default": "CFCl3", "type": str}), + ("si_ref", {"default": "TMS", "type": str}), + ("p_ref", {"default": "TMP", "type": str}), + ("h_active", {"default": True, "type": bool}), + ("c_active", {"default": True, "type": bool}), + ("f_active", {"default": False, "type": bool}), + ("si_active", {"default": False, "type": bool}), + ("p_active", {"default": False, "type": bool}), + ("resonance_frequency", {"default": 300.0, "type": float}), + ] + self.defaults_optical_rotation_part5 = [ + # part5 + ("optical_rotation", {"default": False, "type": bool}), + ("func_or", {"default": "pbe", "type": str}), + ("func_or_scf", {"default": "r2scan-3c", "type": str}), + ("basis_or", {"default": "def2-SVPD", "type": str}), + ("freq_or", {"default": [589.0], "type": list}), + ] + + self.internal_defaults = OrderedDict( + self.defaults_refine_ensemble_general + + self.defaults_refine_ensemble_part0 + + self.defaults_refine_ensemble_part1 + + self.defaults_refine_ensemble_part2 + + self.defaults_refine_ensemble_part3 + + self.defaults_nmrprop_part4 + + self.defaults_optical_rotation_part5 + ) + + # update internal defaults specific to QM package + # orca + self.internal_defaults_orca = deepcopy(self.internal_defaults) + self.internal_defaults_orca["sm2"]["default"] = "smd" + self.internal_defaults_orca["smgsolv1"]["default"] = "smd" + self.internal_defaults_orca["smgsolv2"]["default"] = "smd" + self.internal_defaults_orca["smgsolv3"]["default"] = "smd" + self.internal_defaults_orca["sm4_j"]["default"] = "smd" + self.internal_defaults_orca["sm4_s"]["default"] = "smd" + self.internal_defaults_orca["basis"]["default"] = "def2-TZVP(-f)" + self.internal_defaults_orca["basis3"]["default"] = "def2-TZVP(-f)" + # tm + self.internal_defaults_tm = deepcopy(self.internal_defaults) + self.internal_defaults_tm["sm2"]["default"] = "dcosmors" + self.internal_defaults_tm["smgsolv1"]["default"] = "dcosmors" + self.internal_defaults_tm["smgsolv2"]["default"] = "dcosmors" + self.internal_defaults_tm["smgsolv3"]["default"] = "dcosmors" + self.internal_defaults_tm["sm4_j"]["default"] = "dcosmors" + self.internal_defaults_tm["sm4_s"]["default"] = "dcosmors" + + self.value_options = { + "nconf": ["all", "number e.g. 10 up to all conformers"], + "charge": ["number e.g. 0"], + "unpaired": ["number e.g. 0"], + "solvent": ["gas"] + [i for i in censo_solvent_db.keys()], + "prog": ["tm", "orca"], + "part0": ["on", "off"], + "part1": ["on", "off"], + "part2": ["on", "off"], + "part3": ["on", "off"], + "part4": ["on", "off"], + "optical_rotation": ["on", "off"], + "prog3": ["tm", "orca", "prog"], + "ancopt": ["on", "off"], + "opt_spearman": ["on", "off"], + "evaluate_rrho": ["on", "off"], + "consider_sym": ["on", "off"], + "prog_rrho": ["xtb", "prog"], + "part0_gfnv": self.impgfnv, + "part1_gfnv": self.impgfnv, + "part2_gfnv": self.impgfnv, + "part3_gfnv": self.impgfnv, + "temperature": ["temperature in K e.g. 298.15"], + "multitemp": ["on", "off"], + "trange": ["temperature range [start, end, step]"], + "func0": self.impfunc, + "basis0": ["automatic"] + list(self.func_basis_default.values()), + "func": self.impfunc, + "basis": ["automatic"] + list(self.func_basis_default.values()), + "func3": self.impfunc3, + "basis3": self.knownbasissets3, + "part0_threshold": ["number e.g. 4.0"], + "part1_threshold": ["number e.g. 5.0"], + "opt_limit": ["number e.g. 4.0"], + "part2_threshold": [ + "Boltzmann sum threshold in %. e.g. 95 (between 1 and 100)" + ], + "part3_threshold": [ + "Boltzmann sum threshold in %. e.g. 95 (between 1 and 100)" + ], + "sm2": self.impsm2, + "smgsolv3": self.impsmgsolv3, + "sm4_j": self.impsm4_j, + "sm4_s": self.impsm4_s, + "check": ["on", "off"], + "crestcheck": ["on", "off"], + "maxthreads": ["number of threads e.g. 2"], + "omp": ["number cores per thread e.g. 4"], + "smgsolv1": self.impsmgsolv1, + "smgsolv2": self.impsmgsolv2, + "bhess": ["on", "off"], + "rmsdbias": ["on", "off"], + "sm_rrho": ["alpb", "gbsa"], + "optcycles": ["number e.g. 5 or 10"], + "optlevel2": [ + "crude", + "sloppy", + "loose", + "lax", + "normal", + "tight", + "vtight", + "extreme", + "automatic", + ], + "spearmanthr": ["value between -1 and 1, if outside set automatically"], + "couplings": ["on", "off"], + "prog4_j": ["tm", "orca", "adf", "prog"], + "prog4_s": ["tm", "orca", "adf", "prog"], + "func_j": self.impfunc_j, + "basis_j": self.knownbasissetsJ, + "func_s": self.impfunc_s, + "basis_s": self.knownbasissetsS, + "h_ref": self.imphref, + "c_ref": self.impcref, + "f_ref": self.impfref, + "si_ref": self.impsiref, + "p_ref": self.imppref, + "h_active": ["on", "off"], + "c_active": ["on", "off"], + "f_active": ["on", "off"], + "p_active": ["on", "off"], + "si_active": ["on", "off"], + "resonance_frequency": [ + "MHz number of your experimental spectrometer setup" + ], + "shieldings": ["on", "off"], + "radsize": ["number e.g. 8 or 10"], + "func_or": ["functional for opt_rot e.g. pbe"], + "func_or_scf": ["functional for SCF in opt_rot e.g. r2scan-3c"], + "basis_or": ["basis set for opt_rot e.g. def2-SVPD"], + "freq_or": ["list of freq in nm to evaluate opt rot at e.g. [589, 700]"], + "hlow": ["lowest force constant in ANC generation, e.g. 0.01"], + } + # must not be changed if restart(concerning optimization) + self.restart_unchangeable = [ + "unpaired", + "charge", + "solvent", + "temperature", + "prog", + "ancopt", + "opt_spearman", + "optlevel2", + "func", + "basis", + "sm2", + "nat", + "radsize", + "consider_sym", + ] + # might be changed, but data may be lost/overwritten + self.restart_changeable = { + "multitemp": False, + # "temperature": False, # should not be changeable all solvent and rrho values depend on this + "trange": False, + "bhess": False, + "part1_gfnv": False, + "part2_gfnv": False, + "part3_gfnv": False, + "smgsolv1": False, + "smgsolv2": False, + "smgsolv3": False, + "func_or": False, + "basis_or": False, + "func_or_scf": False, + "freq_or": False, + # "consider_sym": False, # --> reset all rrho values! + } + + +class config_setup(internal_settings): + """ + Read or write configuration or input files. + """ + + def __init__(self, path=os.getcwd(), *args, **kwargs): + internal_settings.__init__(self, *args, **kwargs) + # settings just to calm down pylint, real assignment is dynamically done + # general settings + self.nconf = None + self.charge = 0 + self.unpaired = 0 + self.solvent = "gas" + self.prog_rrho = "xtb" + self.temperature = 298.15 + self.trange = [273.15, 378.15, 5] + self.multitemp = False + self.evaluate_rrho = True + self.bhess = True + self.consider_sym = False + self.sm_rrho = "alpb" + self.check = True + self.crestcheck = False + self.prog = "tm" + self.func = "b97-3c" + self.basis = "automatic" + self.maxthreads = 1 + self.omp = 1 + # part0 + self.part0 = False + self.part0_gfnv = "gfnff" + self.part0_threshold = 4.0 + self.func0 = "b97-d" + self.basis0 = "def2-SV(P)" + # part1 + self.part1 = True + self.smgsolv1 = "sm2" + self.part1_gfnv = "gfnff" + self.part1_threshold = 1.0 + # part2 + self.part2 = True + self.part2_threshold = 90 + self.sm2 = "default" + self.smgsolv2 = "sm2" + self.part2_gfnv = "gfnff" + self.ancopt = True + self.hlow = 0.01 + self.opt_spearman = False + self.optcycles = 5 + self.optlevel2 = "automatic" + self.spearmanthr = 0.9999 + self.radsize = 8 + # part3 + self.part3 = True + self.prog3 = "prog" + self.func3 = "b97-d" + self.basis3 = "def2-TZVPD" + self.smgsolv3 = "sm2" + self.part3_gfnv = "gfn2" + # part4 + self.part4 = False + self.prog4_j = "tm" + self.prog4_s = "tm" + self.couplings = True + self.func_j = "pbe0" + self.basis_j = "def2-TZVP" + self.shieldings = True + self.func_s = "pbe0" + self.basis_s = "def2-TZVP" + self.sm4_j = "default" + self.sm4_s = "default" + self.h_ref = "TMS" + self.c_ref = "TMS" + self.f_ref = "CFCl3" + self.si_ref = "TMS" + self.p_ref = "TMP" + self.resonance_frequency = 300.0 + # part5 + self.optical_rotation = False + self.func_or = "pbe" + self.func_or_scf = "r2scan-3c" + self.basis_or = "def2-SVPD" + self.freq_or = [589] + + # settings the program operates with updated to the defaults + for key in self.internal_defaults.keys(): + setattr(self, key, self.internal_defaults[key]["default"]) + + # workingdirectory + self.cwd = path + self.ensemblepath = "" + self.configpath = "" + self.jsonpath = "" + + # formatting: + self.lenconfx = 3 + + self.save_errors = [] + self.save_infos = [] + + self.startupinfokeys = ["nat", "md5", "maxconf", "run"] + self.nat = 0 + self.md5 = "" + self.maxconf = 0 + self.run = True + self.nmrmode = False + + # pathsdefaults: --> read_program_paths + self.external_paths = {} + self.external_paths["orcapath"] = "" + self.external_paths["orcaversion"] = "" + self.external_paths["xtbpath"] = "" + self.external_paths["crestpath"] = "" + self.external_paths["cosmorssetup"] = "" + self.external_paths["dbpath"] = "" + self.external_paths["cosmothermversion"] = "" + self.external_paths["mpshiftpath"] = "" + self.external_paths["escfpath"] = "" + + def cleanup_run(self, complete=False): + """ + Delete all unneeded files. + """ + files_in_cwd = [ + f for f in os.listdir(self.cwd) if os.path.isfile(os.path.join(self.cwd, f)) + ] + for file in files_in_cwd: + if ( + "enso.json." in file + or "enso_ensemble_part1.xyz." in file + or "enso_ensemble_part2.xyz." in file + or "enso_ensemble_part3.xyz." in file + ): + if int(file.split(".")[2]) > 1: + print(f"Removing: {file}") + os.remove(os.path.join(self.cwd, file)) + if complete: + if "enso.json" in files_in_cwd: + print(f"Removing: {'enso.json'}") + os.remove(os.path.join(self.cwd, "enso.json")) + if "enso.json.1" in files_in_cwd: + print(f"Removing: {'enso.json.1'}") + os.remove(os.path.join(self.cwd, "enso.json.1")) + if os.path.isdir(os.path.join(self.cwd, "conformer_rotamer_check")): + print("Removing conformer_rotamer_check") + shutil.rmtree(os.path.join(self.cwd, "conformer_rotamer_check")) + # for file in files_in_cwd: + # if 'mat.tmp' in file: + # print(f"Removing: {file}") + # os.remove(os.path.join(self.cwd,file)) + # remove *mat.tmp files + # ask if CONF folders should be removed + + def get_method_name( + self, + jobtype, + func=None, + basis=None, + sm=None, + gfn_version=None, + bhess=None, + solvent=None, + prog=None, + func2=None, + disp=None, + ): + """ + Create method name for storing and retrieving data + --> method energy + --> method2 gsolv + """ + if func is not None and basis is not None: + if func in self.composite_method_basis.keys(): + if basis == self.func_basis_default.get(func, None): + # composite method (e.g. r2scan-3c) + tmp_func_basis = func + elif disp is not None: + # FUNC/BASIS + tmp_func_basis = f"{func}-{disp}/{basis}" + else: + # FUNC-DISP/BASIS + tmp_func_basis = f"{func}/{basis}" + elif disp is not None: + # FUNC/BASIS + tmp_func_basis = f"{func}-{disp}/{basis}" + else: + # FUNC-DISP/BASIS + tmp_func_basis = f"{func}/{basis}" + if jobtype in ("cosmors",): + exc_name = {"cosmors": "COSMO-RS-normal", "cosmors-fine": "COSMO-RS-fine"} + # energy FUNC/BASIS + method = tmp_func_basis + # cosmors gsolv COSMO-RS[FUNC/BASIS] + method2 = f"{exc_name.get(sm)}[{tmp_func_basis}]" + elif jobtype in ("gbsa_gsolv", "alpb_gsolv"): + # energy FUNC/BASIS + method = tmp_func_basis + # e.g. ALPB_Gsolv[GFN2] + method2 = f"{sm}[{gfn_version}]" + elif jobtype == "sp": + # energy FUNC/BASIS + method = tmp_func_basis + elif jobtype == "sp_implicit": + # energy FUNC/BASIS[DCOSMORS] + method = f"{tmp_func_basis}[{str(sm).upper()}]" + method2 = "incl. in E" + elif jobtype == "smd_gsolv": + # energy FUNC/BASIS + method = tmp_func_basis + # SMD_gsolv SMD_GSOLV[FUNC/BASIS] + method2 = f"{sm}[{tmp_func_basis}]" + elif jobtype == "rrhoxtb": + # GFN2-bhess + if bhess: + if solvent != "gas": + method = f"{str(gfn_version).upper()}[{sm}]-bhess" + else: + method = f"{str(gfn_version).upper()}-bhess" + else: + if solvent != "gas": + method = f"{str(gfn_version).upper()}[{sm}]" + else: + method = f"{str(gfn_version).upper()}" + elif jobtype in ("opt", "xtbopt"): + if solvent == "gas": + # energy FUNC/BASIS + method = tmp_func_basis + else: + # energy FUNC/BASIS[DCOSMORS] + method = f"{tmp_func_basis}[{str(sm).upper()}]" + elif jobtype in ("couplings", "couplings_sp", "shieldings", "shieldings_sp"): + if solvent == "gas": + method = f"{tmp_func_basis}-{prog}" + else: + method = f"{tmp_func_basis}[{str(sm).upper()}]-{prog}" + elif jobtype in ("opt-rot", "opt-rot_sp"): + if solvent == "gas": + method = f"{tmp_func_basis}_[SCF={func2}]({prog})" + else: + method = f"{tmp_func_basis}[{str(sm).upper()}]_[SCF={func2}]({prog})" + else: + raise Exception(f"JOBTYPE {jobtype} not known in get_method_name") + try: + method2 + except NameError: + method2 = "" + return method, method2 + + def provide_runinfo(self, extend=True): + """ + Write dictionary structured like internal defaults. + And extenden with startup information. + """ + runinfo = [] + keys = list(self.internal_defaults.keys()) + if extend: + keys = keys + self.startupinfokeys + for key in keys: + runinfo.append((key, getattr(self, key))) + return OrderedDict(runinfo) + + def _decomment(self, csvfile): + """ + remove any comments from file before parsing with csv.DictReader + comment symbols are # and $ + """ + for row in csvfile: + raw = row.split("#")[0].strip() + raw2 = raw.split("$")[0].strip() + if raw2: + yield raw2 + + def _exchange_onoff(self, inp, reverse=False): + """ + Exchange on --> True, off--> False, backward if reverse=True + """ + exchange = {"on": True, "off": False} + if reverse: + if isinstance(inp, bool) and inp in {v: k for k, v in exchange.items()}: + return {v: k for k, v in exchange.items()}[inp] + else: + return inp + elif not reverse: + if isinstance(inp, str) and inp in exchange.keys(): + return exchange[inp] + else: + return inp + + def read_config(self, path, startread, args): + """ + Read from config data from file (here enso.inp or .censorc), + cml > .censorc > internal defaults + """ + rcdata = {} + with open(path, "r") as csvfile: + # skip header: + while True: + line = csvfile.readline() + if line.startswith(startread): + break + elif line == "": + # EOF + break + else: + pass + reader = csv.DictReader( + self._decomment(csvfile), + fieldnames=("key", "value"), + skipinitialspace=True, + delimiter=":", + ) + for row in reader: + if "end" in row["key"]: + break + else: + rcdata[row["key"]] = row["value"] + if "end" in rcdata: + del rcdata["end"] + + args_key = {v: k for k, v in self.key_args_dict.items()} + cmlflags = vars(args) + for key in cmlflags.keys(): + if key in args_key.keys(): + if cmlflags[key] is not None: + # print(f"SETTING cml: {key} to {cmlflags[key]}") + rcdata[key] = cmlflags[key] + # print(key, cmlflags[key]) + # end get commandline arguments + # update censorc-key to internal key + for key, value in list(rcdata.items()): + if key in self.key_args_dict.keys(): + if key != self.key_args_dict[key]: + # print(f"updating: {key} to {self.key_args_dict[key]} " + # "{value} {rcdata.get(self.key_args_dict[key])}") + rcdata[self.key_args_dict[key]] = rcdata.get( + self.key_args_dict[key], value + ) + del rcdata[key] + # end update censorc-key to internal key + + readinkeys = [] + for item in list(rcdata.keys()): + if item not in self.internal_defaults.keys(): + self.save_errors.append( + f"WARNING: {item} is not a known " + f"keyword in {os.path.basename(path)}." + ) + del rcdata[item] + else: + readinkeys.append(item) + diff = list(set(self.internal_defaults.keys()) - set(readinkeys)) + if diff: + self.save_errors.append( + "WARNING: These keywords were not found in the configuration " + "file {}\n and therefore default " + "values are taken for:".format(os.path.basename(path)) + ) + for item in diff: + self.save_errors.append(" {}".format(item)) + rcdata[item] = self.internal_defaults[item]["default"] + + for key in rcdata: + if rcdata[key] == "": + rcdata[key] = None + # -----> keys are checked, now check values!!!! + for key in rcdata: + # change on --> True , off --> False + rcdata[key] = self._exchange_onoff(rcdata[key]) + if key != "nconf": + if ( + not isinstance(rcdata[key], self.internal_defaults[key]["type"]) + and rcdata[key] is not None + ): + try: + if self.internal_defaults[key]["type"] == list: + tmp = rcdata[key].strip("[") + tmp = tmp.strip("]") + tmp = tmp.split(",") + rcdata[key] = [float(i) for i in tmp] + else: + rcdata[key] = self.internal_defaults[key]["type"]( + rcdata[key] + ) + except (ValueError, TypeError): + self.save_errors.append( + f"WARNING: {key}= {rcdata[key]}" + " could not be" + " converted and default values are set to " + f"{self.internal_defaults[key]['default']}" + ) + rcdata[key] = self.internal_defaults[key]["type"]( + self.internal_defaults[key]["default"] + ) + for key, value in rcdata.items(): + if key in vars(self).keys(): + setattr(self, key, value) + else: + print("ERROR", key) + self.save_errors.append(f"{key} not known in config!") + + def check_logic(self, error_logical=False, silent=False): + """ + Checks settings for impossible setting-comibinations, also checking + if calculations are possible with the requested qm_codes. + """ + if silent: + store_errors = self.save_errors + # if only one conformer! + # if self.nconf == 1 and self.maxconf == 1: + # self.part1 = False + # self.part2 = True + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Handle prog3: + if self.prog3 == "prog" and self.prog in self.value_options["prog"]: + self.prog3 = self.prog + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Handle prog4_j: + if self.prog4_j == "prog" and self.prog in self.value_options["prog"]: + self.prog4_j = self.prog + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Handle prog4: + if self.prog4_s == "prog" and self.prog in self.value_options["prog"]: + self.prog4_s = self.prog + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # set spearmanthr by number of atoms: + if self.spearmanthr < -1 or self.spearmanthr > 1: + self.spearmanthr = 1 / (math.exp(0.03 * (self.nat ** (1 / 4)))) + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Handle prog_rrho + if self.prog_rrho == "prog" and self.prog in self.value_options["prog"]: + self.prog_rrho = self.prog + if self.prog_rrho == "tm": + if shutil.which("thermo") is not None: + # need thermo for reading thermostatistical contribution + self.prog_rrho = "tm" + else: + self.prog_rrho = "xtb" + self.save_errors.append( + "WARNING: Currently are only GFNn-xTB " + "hessians possible and no TM hessians" + ) + elif not self.prog_rrho: + self.save_errors.append( + "WARNING: Thermostatistical contribution to " + "free energy will not be calculated, since prog_rrho ist set to 'off'!" + ) + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Handle func3 dsd-blyp with basis + if self.part3 and self.func3 == "dsd-blyp" and self.basis3 != "def2-TZVPP": + self.save_errors.append( + "WARNING: DSD-BLYP is only available with the " "basis set def2-TZVPP!" + ) + self.basis3 = "def2-TZVPP" + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Handle func0 + if self.prog == "orca" and self.func0 not in self.func_orca: + self.save_errors.append( + "\nERROR: The functional " + "(func0) {} is not implemented with the {} program package." + " Options are: {}".format(self.func0, self.prog, self.func_orca) + ) + error_logical = True + if self.prog == "tm" and self.func0 not in self.func_tm: + self.save_errors.append( + "\nERROR: The functional " + "(func0) {} is not implemented with the {} program package. " + "Options are: {}".format(self.func0, self.prog, self.func_tm) + ) + error_logical = True + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Handle basis0 for func0: + if self.basis0 == "None" or self.basis0 is None or self.basis0 == "automatic": + if self.prog == "tm": + default = self.internal_defaults_tm.get("basis0", "def2-SV(P)") + elif self.prog == "orca": + default = self.internal_defaults_orca.get("basis0", "def2-SV(P)") + else: + default = "def2-SV(P)" + self.basis0 = default + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Handle func + if self.prog == "orca" and self.func not in self.func_orca: + self.save_errors.append( + "\nERROR: The functional " + "(func) {} is not implemented with the {} program package." + " Options are: {}".format(self.func, self.prog, self.func_orca) + ) + error_logical = True + if self.prog == "tm" and self.func not in self.func_tm: + self.save_errors.append( + "\nERROR: The functional " + "(func) {} is not implemented with the {} program package. " + "Options are: {}".format(self.func, self.prog, self.func_tm) + ) + error_logical = True + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Handle basis for func: + if self.basis == "None" or self.basis is None or self.basis == "automatic": + if self.prog == "tm": + default = self.internal_defaults_tm.get("basis", "def2-TZVP") + elif self.prog == "orca": + default = self.internal_defaults_orca.get("basis", "def2-TZVP") + else: + default = "def2-TZVP" + self.basis = self.func_basis_default.get(self.func, default) + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Handle func3 + if self.part3 and self.func3 in ( + "pbeh-3c", + "b973c", + "b97-3c", + "hf3c", + "hf-3c", + "r2scan-3c", + ): + self.save_errors.append( + "Basis set (basis3) is fixed to be " + "def2-TZVPD, keep this in mind when using composite methods!" + ) + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + if self.part4 and (self.couplings or self.shieldings): + self.nmrmode = True + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Handle func_j + if self.prog4_j == "orca" and self.func_j not in self.func_j_orca: + self.save_errors.append( + "\nERROR: In part4 the functional (funcJ) {} " + "is not implemented in ENSO with the {} program package. Options " + "are: {}".format(self.func_j, self.prog4_j, self.func_j_orca) + ) + if not self.part4: + tmp = self.save_errors.pop().replace("\nERROR", "WARNING", 1) + self.save_errors.append(tmp) + else: + error_logical = True + if self.prog4_j == "tm" and self.func_j not in self.func_j_tm: + self.save_errors.append( + "\nERROR: In part4 the functional (funcJ) {} " + "is not implemented in ENSO with the {} program package. Options " + "are: {}".format(self.func_j, self.prog4_j, self.func_j_tm) + ) + if not self.part4: + tmp = self.save_errors.pop().replace("\nERROR", "WARNING", 1) + self.save_errors.append(tmp) + else: + error_logical = True + if self.func_j == "pbeh-3c": + self.basis_j = "def2-mSVP" + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Handle func_s + if self.prog4_s == "orca" and self.func_s not in self.func_s_orca: + self.save_errors.append( + "\nERROR: In part4 the functional (funcS) {}" + " is not implemented in ENSO with the {} program package. Options " + "are: {}".format(self.func_s, self.prog4_s, self.func_s_orca) + ) + if not self.part4: + tmp = self.save_errors.pop().replace("\nERROR", "WARNING", 1) + self.save_errors.append(tmp) + else: + error_logical = True + if self.prog4_s == "tm" and self.func_s not in self.func_s_tm: + self.save_errors.append( + "\nERROR: In part4 the functional (funcS) {}" + " is not implemented in ENSO with the {} program package. Options " + "are: {}".format(self.func_s, self.prog4_s, self.func_s_tm) + ) + if not self.part4: + tmp = self.save_errors.pop().replace("\nERROR", "WARNING", 1) + self.save_errors.append(tmp) + else: + error_logical = True + if self.func_s == "pbeh-3c": + self.basis_s = "def2-mSVP" + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # no unpaired electrons in coupling or shiedling calculations! + if self.unpaired > 0: + if self.part4 and (self.couplings or self.shieldings): + self.save_errors.append( + "ERROR: Coupling and shift calculations " + "(part4) are only available for closed-shell systems!" + ) + error_logical = True + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Solvation: + if self.solvent == "gas": + self.smgsolv1 = "gas-phase" + self.sm2 = "gas-phase" + self.smgsolv2 = "gas-phase" + self.smgsolv3 = "gas-phase" + self.sm4_j = "gas-phase" + self.sm4_s = "gas-phase" + else: + # Handle sm2 --> solvent model in optimization: + exchange_sm = { + "cosmo": "cpcm", + "cpcm": "cosmo", + "dcosmors": "smd", + "smd": "dcosmors", + } + if self.sm2 not in self.impsm2: + self.save_errors.append( + f"ERROR: The solvent model {self.sm2}" " is not implemented!" + ) + error_logical = True + if self.prog == "orca": + if self.sm2 in self.sm2_tm: + self.save_errors.append( + "WARNING: {} is not available with " + "{}! Therefore {} is used!".format( + self.sm2, self.prog, exchange_sm[self.sm2] + ) + ) + self.sm2 = exchange_sm[self.sm2] + elif self.sm2 == "default": + self.sm2 = self.internal_defaults_orca["sm2"]["default"] + if self.prog == "tm": + if self.sm2 in self.sm2_orca: + self.save_errors.append( + "WARNING: {} is not available with " + "{}! Therefore {} is used!".format( + self.sm2, self.prog, exchange_sm[self.sm2] + ) + ) + self.sm2 = exchange_sm[self.sm2] + elif self.sm2 == "default": + self.sm2 = self.internal_defaults_tm["sm2"]["default"] + # Check if solvent-information is available for solventmodel + ### + # Check which solvation models are applied: + + check_for = { + "xtb": False, + "cosmors": False, + "dcosmors": False, + "cpcm": False, + "smd": False, + "DC": False, + } + applied_solventmodels = [] + if self.evaluate_rrho: + applied_solventmodels.append(self.sm_rrho) + if self.part1: + applied_solventmodels.append(self.smgsolv1) + if self.part2: + applied_solventmodels.append(self.sm2) + applied_solventmodels.append(self.smgsolv2) + if self.part3: + applied_solventmodels.append(self.smgsolv3) + if self.part4: + applied_solventmodels.append(self.sm4_j) + applied_solventmodels.append(self.sm4_s) + if self.optical_rotation: + applied_solventmodels.append("cosmo") + + for solventmodel in list(set(applied_solventmodels)): + if solventmodel in ("alpb", "gbsa", "alpb_gsolv", "gbsa_gsolv"): + check_for["xtb"] = True + elif solventmodel in ("cosmors", "cosmors-fine"): + check_for["cosmors"] = True + elif solventmodel in ("dcosmors",): + check_for["dcosmors"] = True + elif solventmodel in ("cosmo",): + check_for["DC"] = True + elif solventmodel in ("cpcm",): + check_for["cpcm"] = True + elif solventmodel in ("smd", "smd_gsolv"): + check_for["smd"] = True + else: + print("unexpected behaviour") + lookup = { + "xtb": "solvents_xtb", + "cosmors": "solvents_cosmors", + "dcosmors": "solvents_dcosmors", + "cpcm": "solvents_cpcm", + "smd": "solvents_smd", + "DC": "", + } + # check if solvent in censo_solvent_db + if censo_solvent_db.get(self.solvent, "not_found") == "not_found": + self.save_errors.append( + f"ERROR: The solvent {self.solvent} is not found!" + ) + error_logical = True + for key, value in check_for.items(): + if value: + if ( + censo_solvent_db[self.solvent].get(key, "nothing_found") + == "nothing_found" + ): + self.save_errors.append( + f"ERROR: The solvent for solventmodel in {key} is not found!" + ) + error_logical = True + if key == "DC": + try: + if not ( + float( + censo_solvent_db[self.solvent].get( + key, "nothing_found" + ) + ) + > 0.0 + and float( + censo_solvent_db[self.solvent].get( + key, "nothing_found" + ) + ) + < 150.0 + ): + self.save_errors.append( + f"ERROR: The dielectric constant can not be converted." + ) + error_logical = True + except ValueError: + self.save_errors.append( + f"ERROR: The dielectric constant can not be converted." + ) + error_logical = True + elif key in ("smd", "cpcm"): + if censo_solvent_db[self.solvent].get(key, "nothing_found")[ + 1 + ].lower() not in getattr(self, lookup[key]): + self.save_errors.append( + f"WARNING: The solvent " + f"{censo_solvent_db[self.solvent].get(key, 'nothing_found')[1]}" + f" for solventmodel/program {key} can not be checked but is used anyway." + ) + else: + if censo_solvent_db[self.solvent].get(key, "nothing_found")[ + 1 + ] not in getattr(self, lookup[key]): + self.save_errors.append( + f"WARNING: The solvent " + f"{censo_solvent_db[self.solvent].get(key, 'nothing_found')[1]} " + f"for solventmodel/program {key} can not be checked but is used anyway." + ) + + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Handle smgsolv1 + exchange_sm = { + "cosmo": "cpcm", + "cpcm": "cosmo", + "dcosmors": "smd", + "smd": "dcosmors", + } + if self.smgsolv1 not in self.impsmgsolv1: + self.save_errors.append( + f"ERROR: The solvent model {self.smgsolv1}" + " is not implemented for smgsolv1 !" + ) + error_logical = True + if self.smgsolv1 == "sm2": + self.smgsolv1 = self.sm2 + if self.prog == "tm" and self.smgsolv1 in self.sm2_orca: + self.save_errors.append( + "WARNING: {} is not available with " + "{}! Therefore {} is used!".format( + self.smgsolv1, self.prog, exchange_sm[self.smgsolv1] + ) + ) + self.smgsolv1 = exchange_sm[self.smgsolv1] + if self.prog == "orca" and self.smgsolv1 in self.sm2_tm: + self.save_errors.append( + "WARNING: {} is not available with " + "{}! Therefore {} is used!".format( + self.smgsolv1, self.prog, exchange_sm[self.smgsolv1] + ) + ) + self.smgsolv1 = exchange_sm[self.smgsolv1] + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Handle smgsolv2 + exchange_sm = { + "cosmo": "cpcm", + "cpcm": "cosmo", + "dcosmors": "smd", + "smd": "dcosmors", + } + if self.smgsolv2 not in self.impsmgsolv2: + self.save_errors.append( + f"ERROR: The solvent model {self.smgsolv2}" + " is not implemented for smgsolv2 !" + ) + error_logical = True + if self.smgsolv2 == "sm2": + self.smgsolv2 = self.sm2 + if self.prog == "tm" and self.smgsolv2 in self.sm2_orca: + self.save_errors.append( + "WARNING: {} is not available with " + "{}! Therefore {} is used!".format( + self.smgsolv2, self.prog, exchange_sm[self.smgsolv2] + ) + ) + self.smgsolv2 = exchange_sm[self.smgsolv2] + if self.prog == "orca" and self.smgsolv2 in self.sm2_tm: + self.save_errors.append( + "WARNING: {} is not available with " + "{}! Therefore {} is used!".format( + self.smgsolv2, self.prog, exchange_sm[self.smgsolv2] + ) + ) + self.smgsolv2 = exchange_sm[self.smgsolv2] + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Handle smgsolv3 + exchange_sm = { + "cosmo": "cpcm", + "cpcm": "cosmo", + "dcosmors": "smd", + "smd": "dcosmors", + } + if self.smgsolv3 not in self.impsmgsolv3: + self.save_errors.append( + f"ERROR: The solvent model {self.smgsolv3}" + " is not implemented for smgsolv3 !" + ) + error_logical = True + if self.smgsolv3 == "sm2": + self.smgsolv3 = self.sm2 + if self.prog == "tm" and self.smgsolv3 in self.sm2_orca: + self.save_errors.append( + "WARNING: {} is not available with " + "{}! Therefore {} is used!".format( + self.smgsolv3, self.prog, exchange_sm[self.smgsolv3] + ) + ) + self.smgsolv3 = exchange_sm[self.smgsolv3] + if self.prog == "orca" and self.smgsolv3 in self.sm2_tm: + self.save_errors.append( + "WARNING: {} is not available with " + "{}! Therefore {} is used!".format( + self.smgsolv3, self.prog, exchange_sm[self.smgsolv3] + ) + ) + self.smgsolv3 = exchange_sm[self.smgsolv3] + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Handle sm4_j + if self.prog4_j == "orca": + if self.sm4_j in self.sm4_j_tm: + self.save_errors.append( + "WARNING: {} is not available with {}!" + " Therefore {} is used!".format( + self.sm4_j, self.prog4_j, exchange_sm[self.sm4_j] + ) + ) + self.sm4_j = exchange_sm[self.sm4_j] + elif self.sm4_j == "default": + self.sm4_j = self.internal_defaults_orca["sm4_j"]["default"] + if self.prog4_j == "tm": + if self.sm4_j in self.sm4_j_orca: + self.save_errors.append( + "WARNING: {} is not available with {}!" + " Therefore {} is used!".format( + self.sm4_j, self.prog4_j, exchange_sm[self.sm4_j] + ) + ) + self.sm4_j = exchange_sm[self.sm4_j] + elif self.sm4_j == "default": + self.sm4_j = self.internal_defaults_tm["sm4_j"]["default"] + # Handle sm4_s + if self.prog4_s == "orca": + if self.sm4_s in self.sm4_s_tm: + self.save_errors.append( + "WARNING: {} is not available with {}!" + " Therefore {} is used!".format( + self.sm4_s, self.prog4_s, exchange_sm[self.sm4_s] + ) + ) + self.sm4_s = exchange_sm[self.sm4_s] + elif self.sm4_s == "default": + self.sm4_s = self.internal_defaults_orca["sm4_s"]["default"] + if self.prog4_s == "tm": + if self.sm4_s in self.sm4_s_orca: + self.save_errors.append( + "WARNING: {} is not available with {}!" + " Therefore {} is used!".format( + self.sm4_s, self.prog4_s, exchange_sm[self.sm4_s] + ) + ) + self.sm4_s = exchange_sm[self.sm4_s] + elif self.sm4_s == "default": + self.sm4_s = self.internal_defaults_tm["sm4_s"]["default"] + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Handle optlevel2: + # sm2 needs to be set (not default!) + if self.optlevel2 in ("None", None, "automatic"): + if self.sm2 in ("smd", "dcosmors") and self.solvent != "gas": + self.optlevel2 = "lax" + else: + # gas phase + self.optlevel2 = "normal" + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + if self.part4 and not self.couplings and not self.shieldings: + self.part4 = False + self.save_errors.append( + "WARNING: Neither calculating coupling nor " + "shielding constants is activated! Part 4 is not executed." + ) + elif not any( + [ + getattr(self, flag) + for flag in ( + "h_active", + "c_active", + "f_active", + "si_active", + "p_active", + ) + ] + ): + if self.part4: + self.save_errors.append( + "WARNING: No type of NMR spectrum is " + "activated in the .censorc! Therefore all nuclei are calculated!" + ) + self.part4 = True + else: + self.save_errors.append( + "WARNING: No type of NMR spectrum is activated in the .censorc!" + ) + if silent: + self.save_errors = store_errors + return error_logical + + def print_parameters(self): + """ + print settings at startup + """ + + # print parameter setting + print("\n" + "".ljust(PLENGTH, "-")) + print("PARAMETERS".center(PLENGTH, " ")) + print("".ljust(PLENGTH, "-") + "\n") + + print( + f"The config file {os.path.basename(self.configpath)} is read " + f"from {self.configpath}." + ) + print(f"Reading conformer rotamer ensemble from: {self.ensemblepath}.") + if self.save_infos: + for _ in list(self.save_infos): + print(self.save_infos.pop(0)) + if self.save_errors: + print("") + for _ in list(self.save_errors): + print(self.save_errors.pop(0)) + info = [] + info.append(["justprint", "\n" + "".ljust(int(PLENGTH / 2), "-")]) + info.append(["justprint", "CRE SORTING SETTINGS".center(int(PLENGTH / 2), " ")]) + info.append(["justprint", "".ljust(int(PLENGTH / 2), "-") + "\n"]) + info.append(["nat", "number of atoms in system"]) + info.append(["nconf", "number of considered conformers"]) + info.append(["maxconf", "number of all conformers from input"]) + info.append(["charge", "charge"]) + info.append(["unpaired", "unpaired"]) + info.append(["solvent", "solvent"]) + info.append(["temperature", "temperature"]) + if self.multitemp: + info.append(["multitemp", "evalulate at different temperatures"]) + info.append( + [ + "printoption", + "temperature range", + [i for i in frange(self.trange[0], self.trange[1], self.trange[2])], + ] + ) + info.append(["evaluate_rrho", "calculate mRRHO contribution"]) + info.append(["consider_sym", "consider symmetry for mRRHO contribution"]) + info.append(["check", "cautious checking for error and failed calculations"]) + info.append(["crestcheck", "checking the DFT-ensemble using CREST"]) + info.append(["maxthreads", "maxthreads"]) + info.append(["omp", "omp"]) + + # PART0: + info.append(["justprint", "\n" + "".ljust(int(PLENGTH / 2), "-")]) + info.append( + [ + "justprint", + "CRE CHEAP-PRESCREENING - PART0".center(int(PLENGTH / 2), " "), + ] + ) + info.append(["justprint", "".ljust(int(PLENGTH / 2), "-")]) + info.append(["part0", "part0"]) + info.append(["nconf", "starting number of considered conformers"]) + info.append(["prog", "program for part0"]) + info.append(["func0", "functional for fast single-point"]) + info.append(["basis0", "basis set for fast single-point"]) + info.append(["part0_threshold", "threshold for sorting in part0"]) + + tmp_func_basis, _ = self.get_method_name( + "sp", func=getattr(self, "func0"), basis=getattr(self, "basis0"), disp="D3" + ) + info.append( + [ + "justprint", + f"\nshort-notation:\n{tmp_func_basis} " "// GFNn-xTB (Input geometry)", + ] + ) + + # PART1: + info.append(["justprint", "\n" + "".ljust(int(PLENGTH / 2), "-")]) + info.append( + ["justprint", "CRE PRESCREENING - PART1".center(int(PLENGTH / 2), " ")] + ) + info.append(["justprint", "".ljust(int(PLENGTH / 2), "-")]) + info.append(["part1", "part1"]) + info.append(["nconf", "starting number of considered conformers"]) + info.append(["prog", "program for part1"]) + info.append(["func", "functional for initial evaluation"]) + info.append(["basis", "basis set for initial evaluation"]) + info.append(["evaluate_rrho", "calculate mRRHO contribution"]) + if self.evaluate_rrho: + info.append(["prog_rrho", "program for mRRHO contribution"]) + if self.prog_rrho == "xtb" or self.smgsolv2 == "gbsa_gsolv": + info.append(["part1_gfnv", "GFN version for mRRHO and/or GBSA_Gsolv"]) + info.append( + [ + "bhess", + "Apply constraint to input geometry during mRRHO calculation", + ] + ) + info.append(["printoption", "evalulate at different temperatures", "off"]) + info.append(["part1_threshold", "threshold for sorting in part1"]) + if self.solvent != "gas": + info.append(["smgsolv1", "solvent model for Gsolv contribution of part1"]) + # shortnotation: + tmp_rrho_method, _ = self.get_method_name( + "rrhoxtb", + bhess=self.bhess, + gfn_version=self.part1_gfnv, + sm=self.sm_rrho, + solvent=self.solvent, + ) + tmp_func_basis, _ = self.get_method_name( + "sp", func=getattr(self, "func"), basis=getattr(self, "basis"), disp="D3" + ) + if self.solvent != "gas": + info.append( + [ + "justprint", + f"\nshort-notation:\n{tmp_func_basis} + " + f"{str(getattr(self, 'smgsolv1')).upper()}[{self.solvent}] " + f"+ GmRRHO({tmp_rrho_method}) " + f"// GFNn-xTB (Input geometry)", + ] + ) + else: + info.append( + [ + "justprint", + f"\nshort-notation:\n{tmp_func_basis} " + f"+ GmRRHO({str(getattr(self, 'part1_gfnv')).upper()}) " + "// GFNn-xTB (Input geometry)", + ] + ) + if self.part2: + # PART2: + info.append(["justprint", "\n" + "".ljust(int(PLENGTH / 2), "-")]) + info.append( + ["justprint", "CRE OPTIMIZATION - PART2".center(int(PLENGTH / 2), " ")] + ) + info.append(["justprint", "".ljust(int(PLENGTH / 2), "-")]) + info.append(["part2", "part2"]) + info.append(["prog", "program"]) + info.append(["func", "functional for part2"]) + info.append(["basis", "basis set for part2"]) + info.append(["ancopt", "using xTB-optimizer for optimization"]) + if self.opt_spearman: + info.append(["opt_spearman", "using the new ensemble optimizer"]) + info.append( + [ + "opt_limit", + "completely optimize all conformers below this threshold", + ] + ) + info.append(["printoption", "spearmanthr", f"{self.spearmanthr:.3f}"]) + if self.ancopt and self.optlevel2 is not None: + info.append(["optlevel2", "optimization level in part2"]) + if self.solvent != "gas": + info.append(["sm2", "solvent model applied in the optimization"]) + if self.smgsolv2 not in (None, "sm2"): + info.append(["smgsolv2", "solvent model for Gsolv contribution"]) + info.append(["multitemp", "evalulate at different temperatures"]) + info.append( + ["part2_threshold", "Boltzmann sum threshold for sorting in part2"] + ) + info.append(["evaluate_rrho", "calculate mRRHO contribution"]) + if self.evaluate_rrho: + info.append(["prog_rrho", "program for mRRHO contribution"]) + if self.prog_rrho == "xtb": + info.append( + ["part2_gfnv", "GFN version for mRRHO and/or GBSA_Gsolv"] + ) + if self.bhess: + info.append( + [ + "bhess", + "Apply constraint to input geometry " + "during mRRHO calculation", + ] + ) + # shortnotation: + tmp_rrho_method, _ = self.get_method_name( + "rrhoxtb", + bhess=self.bhess, + gfn_version=self.part2_gfnv, + sm=self.sm_rrho, + solvent=self.solvent, + ) + tmp_func_basis, _ = self.get_method_name( + "sp", func=getattr(self, "func"), basis=getattr(self, "basis") + ) + if self.solvent != "gas": + info.append( + [ + "justprint", + f"\nshort-notation:\n{tmp_func_basis} + " + f"{str(getattr(self, 'smgsolv2')).upper()}[{self.solvent}] " + f"+ GmRRHO({tmp_rrho_method}) // " + f"{tmp_func_basis}" + f"[{str(getattr(self, 'sm2')).upper()}] ", + ] + ) + else: + info.append( + [ + "justprint", + f"\nshort-notation:\n{tmp_func_basis} " + f"+ GmRRHO({str(getattr(self, 'part2_gfnv')).upper()}) " + f"// {tmp_func_basis}", + ] + ) + # PART3: + if self.part3: + info.append(["justprint", "\n" + "".ljust(int(PLENGTH / 2), "-")]) + info.append( + ["justprint", "CRE REFINEMENT - PART3".center(int(PLENGTH / 2), " ")] + ) + info.append(["justprint", "".ljust(int(PLENGTH / 2), "-")]) + info.append(["part3", "part3"]) + info.append(["part3_threshold", "Boltzmann sum threshold employed"]) + info.append(["prog3", "program for part3"]) + info.append(["func3", "functional for part3"]) + info.append(["basis3", "basis set for part3"]) + if self.solvent != "gas": + info.append(["smgsolv3", "solvent model"]) + info.append(["multitemp", "evalulate at different temperatures"]) + info.append(["prog_rrho", "program for mRRHO contribution"]) + if self.prog_rrho == "xtb": + info.append(["part3_gfnv", "GFN version for mRRHO and/or GBSA_Gsolv"]) + if self.bhess: + info.append( + [ + "bhess", + "Apply constraint to input geometry during mRRHO calculation", + ] + ) + # shortnotation: + tmp_rrho_method, _ = self.get_method_name( + "rrhoxtb", + bhess=self.bhess, + gfn_version=self.part3_gfnv, + sm=self.sm_rrho, + solvent=self.solvent, + ) + tmp_func3_basis3, _ = self.get_method_name( + "sp", + func=getattr(self, "func3"), + basis=getattr(self, "basis3"), + disp="D3", + ) + tmp_func_basis, _ = self.get_method_name( + "sp", func=getattr(self, "func"), basis=getattr(self, "basis") + ) + if self.solvent != "gas": + info.append( + [ + "justprint", + f"\nshort-notation:\n{tmp_func3_basis3} + " + f"{str(getattr(self, 'smgsolv3')).upper()}[{self.solvent}] " + f"+ GmRRHO({tmp_rrho_method}) // " + f"{tmp_func_basis}" + f"[{str(getattr(self, 'sm2')).upper()}] ", + ] + ) + else: + info.append( + [ + "justprint", + f"\nshort-notation:\n{tmp_func3_basis3}" + f" + GmRRHO({str(getattr(self, 'part3_gfnv')).upper()}) " + f"// {tmp_func_basis}", + ] + ) + # NMR MODE + if self.nmrmode: + info.append(["justprint", "\n" + "".ljust(int(PLENGTH / 2), "-")]) + info.append( + ["justprint", " NMR MODE SETTINGS".center(int(PLENGTH / 2), " ")] + ) + info.append(["justprint", "".ljust(int(PLENGTH / 2), "-")]) + info.append(["part4", "part4"]) + info.append(["couplings", "calculate couplings (J)"]) + if self.couplings: + info.append(["prog4_j", "program for coupling calculations"]) + if self.solvent != "gas": + info.append(["sm4_j", "solvation model for coupling calculations"]) + info.append(["func_j", "functional for coupling calculation"]) + info.append(["basis_j", "basis set for coupling calculation"]) + info.append(["justprint", ""]) + info.append(["shieldings", "calculate shieldings (S)"]) + if self.shieldings: + info.append(["prog4_s", "program for shielding calculations"]) + if self.solvent != "gas": + info.append(["sm4_s", "solvation model for shielding calculations"]) + info.append(["func_s", "functional for shielding calculation"]) + info.append(["basis_s", "basis set for shielding calculation"]) + info.append(["justprint", ""]) + if getattr(self, "h_active"): + info.append(["h_active", "Calculating proton spectrum"]) + info.append(["h_ref", "reference for 1H"]) + if getattr(self, "c_active"): + info.append(["c_active", "Calculating carbon spectrum"]) + info.append(["c_ref", "reference for 13C"]) + if getattr(self, "f_active"): + info.append(["f_active", "Calculating fluorine spectrum"]) + info.append(["f_ref", "reference for 19F"]) + if getattr(self, "si_active"): + info.append(["si_active", "Calculating silicon spectrum"]) + info.append(["si_ref", "reference for 29Si"]) + if getattr(self, "p_active"): + info.append(["p_active", "Calculating phosphorus spectrum"]) + info.append(["p_ref", "reference for 31P"]) + info.append(["resonance_frequency", "resonance frequency"]) + # short notation: + + if self.optical_rotation: + info.append(["justprint", "\n" + "".ljust(int(PLENGTH / 2), "-")]) + info.append( + [ + "justprint", + "OPTICAL ROTATION MODE - PART5".center(int(PLENGTH / 2), " "), + ] + ) + info.append(["justprint", "".ljust(int(PLENGTH / 2), "-")]) + info.append(["optical_rotation", "part5"]) + info.append(["freq_or", "frequency in [nm]"]) + info.append(["func_or_scf", "functional for SCF"]) + info.append(["func_or", "functional for optical rotation"]) + info.append(["basis_or", "basis set for optical rotation"]) + if not self.part3: + info.append(["part2_threshold", "Boltzmann sum threshold employed"]) + elif self.part3: + info.append(["part3_threshold", "Boltzmann sum threshold employed"]) + + optionsexchange = {True: "on", False: "off"} + for item in info: + if item[0] == "justprint": + # print everything after justprint + print(item[1:][0]) + else: + if item[0] == "printoption": + option = item[2] + else: + option = getattr(self, item[0]) + if option is True or option is False: + option = optionsexchange[option] + elif isinstance(option, list): + option = [str(i) for i in option] + if len(str(option)) > 40: + length = 0 + reduced = [] + for i in option: + length += len(i) + 2 + if length < 40: + reduced.append(i) + reduced.append("...") + option = reduced + length = 0 + option = ", ".join(option) + print( + "{}: {:{digits}} {}".format( + item[1], "", option, digits=DIGILEN - len(item[1]) + ) + ) + print("END of parameters\n") + + def read_program_paths(self, configpath): + """ + Get absolute paths of external programs employed in enso + Read from the configuration file .censorc + """ + with open(configpath, "r") as inp: + stor = inp.readlines() + for line in stor: + if "ctd =" in line: + try: + self.external_paths["cosmorssetup"] = str(line.rstrip(os.linesep)) + except: + print( + "WARNING: Could not read settings for COSMO-RS from .censorc!" + ) + try: + normal = "DATABASE-COSMO/BP-TZVP-COSMO" + fine = "DATABASE-COSMO/BP-TZVPD-FINE" + if "fine" in self.external_paths["cosmorssetup"].lower(): + tmpdb = fine + else: + tmpdb = normal + self.external_paths["dbpath"] = os.path.join( + os.path.split( + self.external_paths["cosmorssetup"].split()[5].strip('"') + )[0], + tmpdb, + ) + os.path.isdir(self.external_paths["dbpath"]) + except Exception as e: + print(e) + print( + "WARNING: Could not read settings for COSMO-RS from " + ".censorc!\nMost probably there is a user " + "input error." + ) + if "cosmothermversion:" in line: + try: + self.external_paths["cosmothermversion"] = int(line.split()[1]) + except: + print( + "WARNING: Cosmothermversion could not be read! This " + "is necessary to prepare the cosmotherm.inp! " + ) + if "ORCA:" in line: + try: + self.external_paths["orcapath"] = str(line.split()[1]) + except: + print("WARNING: Could not read path for ORCA from .censorc!.") + if "ORCA version:" in line: + try: + tmp = line.split()[2] + tmp = tmp.split(".") + tmp.insert(1, ".") + tmp = "".join(tmp) + self.external_paths["orcaversion"] = tmp + except: + print("WARNING: Could not read ORCA version from .censorc!") + if "GFN-xTB:" in line: + try: + self.external_paths["xtbpath"] = str(line.split()[1]) + except: + print("WARNING: Could not read path for GFNn-xTB from .censorc!") + if shutil.which("xtb") is not None: + self.external_paths["xtbpath"] = shutil.which("xtb") + print( + "Going to use {} instead.".format( + self.external_paths["xtbpath"] + ) + ) + if "CREST:" in line: + try: + self.external_paths["crestpath"] = str(line.split()[1]) + except: + print("WARNING: Could not read path for CREST from .censorc!") + if shutil.which("crest") is not None: + self.external_paths["crestpath"] = shutil.which("crest") + print( + "Going to use {} instead.".format( + self.external_paths["crestpath"] + ) + ) + if "mpshift:" in line: + try: + self.external_paths["mpshiftpath"] = str(line.split()[1]) + except: + print("ẂARNING: Could not read path for mpshift from .censorc!") + if "escf:" in line: + try: + self.external_paths["escfpath"] = str(line.split()[1]) + except: + print("WARNING: Could not read path for escf from .censorc!") + if "$ENDPROGRAMS" in line: + break + + def needed_external_programs(self, config): + """ + Automatically checks which external programs are required for the + current run. + """ + requirements = {} + # xTB + if ( + config.prog_rrho == "xtb" + or config.part0 + or config.ancopt + or config.smgsolv2 in ("gbsa_gsolv", "alpb_gsolv") + ): + requirements["needxtb"] = True + # TM + if ( + config.prog == "tm" + or config.prog3 == "tm" + or config.prog4_j == "tm" + or config.prog4_s == "tm" + or config.smgsolv1 in ("cosmors", "cosmors-fine") + or config.smgsolv2 in ("cosmors", "cosmors-fine") + or config.smgsolv3 in ("cosmors", "cosmors-fine") + ): + requirements["needtm"] = True + requirements["needcefine"] = True + if config.part4 and (config.prog4_j == "tm" or config.prog4_s == "tm"): + if config.couplings: + requirements["needescf"] = True + if config.shieldings: + requirements["needmpshift"] = True + # COSMORS + if "cosmors" in {config.smgsolv1, config.smgsolv2, config.smgsolv3}: + requirements["needcosmors"] = True + elif "cosmors-fine" in {config.smgsolv1, config.smgsolv2, config.smgsolv3}: + requirements["needcosmors"] = True + # ORCA + if ( + config.prog == "orca" + or config.prog3 == "orca" + or config.prog4_j == "orca" + or config.prog4_s == "orca" + or config.smgsolv1 == "smd_gsolv" + or config.smgsolv2 == "smd_gsolv" + or config.smgsolv3 == "smd_gsolv" + ): + requirements["needorca"] = True + if config.run: + requirements["startenso"] = True + return requirements + + def _updateEnvironsettings(self, newsettings=None): + """ + Update the environmentsettings which is needed for e.g. Turbomole + calculations and is provided in each subroutine call. + """ + if newsettings is not None: + for key, value in newsettings.items(): + ENVIRON[key] = str(value) + + def processQMpaths(self, requirements, error_logical): + """ + print path at startup and return error if programs don't exist + """ + # print relevant Program paths: + print("\n" + "".ljust(DIGILEN, "-")) + print("PATHS of external QM programs".center(DIGILEN, " ")) + print("".ljust(DIGILEN, "-") + "\n") + print("The following program paths are used:") + if requirements.get("needorca", False): + print(" ORCA: {}".format(self.external_paths["orcapath"])) + print(" ORCA Version: {}".format(self.external_paths["orcaversion"])) + if requirements.get("needxtb", False): + print(" xTB: {}".format(self.external_paths["xtbpath"])) + if requirements.get("needcrest", False): + print(" CREST: {}".format(self.external_paths["crestpath"])) + if requirements.get("needtm", False): + tmpath = shutil.which("ridft") + if tmpath is not None: + tmpath = os.path.dirname(tmpath) + else: + tmpath = "None" + print(" TURBOMOLE: {}".format(tmpath)) + if requirements.get("needescf", False): + print(" escf: {}".format(self.external_paths["escfpath"])) + if requirements.get("needmpshift", False): + print(" mpshift: {}".format(self.external_paths["mpshiftpath"])) + if requirements.get("needcosmors", False): + try: + tmp = self.external_paths["cosmorssetup"].split() + if len(tmp) == 9: + print(" Setup of COSMO-RS:") + print(" {}".format(" ".join(tmp[0:3]))) + print(" {}".format(" ".join(tmp[3:6]))) + print(" {}".format(" ".join(tmp[6:9]))) + else: + print( + f" Setup of COSMO-RS: {str(self.external_paths['cosmorssetup'])}" + ) + except: + print( + " Setup of COSMO-RS: {}".format( + str(self.external_paths["cosmorssetup"]) + ) + ) + print( + f" Using {self.external_paths['dbpath']}\n" + " as path to the COSMO-RS DATABASE." + ) + print("") + # Check if paths of needed programs exist: + if requirements.get("needcrest", False): + if ( + self.external_paths["crestpath"] is None + or shutil.which(self.external_paths["crestpath"]) is None + ): + print("ERROR: path for CREST is not correct!") + error_logical = True + # xTB + if requirements.get("needxtb", False): + if ( + self.external_paths["xtbpath"] is None + or shutil.which(self.external_paths["xtbpath"]) is None + ): + print("ERROR: path for xTB is not correct!") + error_logical = True + try: + ENVIRON["OMP_NUM_THREADS"] = "{:d}".format(self.omp) + except: + print("ERROR: can not set omp for xTB calculation!") + # ORCA + if requirements.get("needorca", False): + if ( + self.external_paths["orcapath"] is None + or shutil.which(os.path.join(self.external_paths["orcapath"], "orca")) + is None + ): + print("ERROR: path for ORCA is not correct!") + error_logical = True + # cefine + if requirements.get("needcefine", False): + if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): + # print('running in a PyInstaller bundle') + bundle_dir = getattr( + sys, "_MEIPASS", os.path.abspath(os.path.dirname(__file__)) + ) + path_to_cefine = os.path.abspath(os.path.join(bundle_dir, "cefine")) + if not os.path.exists(path_to_cefine): + path_to_cefine = shutil.which("cefine") + else: + # print('running in a normal Python process') + path_to_cefine = shutil.which("cefine") + + if os.path.exists(path_to_cefine): + print(" Using cefine from {}".format(path_to_cefine)) + self.external_paths["cefinepath"] = path_to_cefine + else: + print( + "ERROR: cefine (the commandline program for define) has not been found!" + ) + print(f"{'':{7}}all programs needing TM can not start!") + error_logical = True + # TM + if requirements.get("needtm", False): + # preparation of parallel calculation with TM + try: + if ENVIRON.get("PARA_ARCH", None) == "SMP": + try: + ENVIRON["PARNODES"] = str(self.omp) + ENVIRON["OMP_NUM_THREADS"] = "{:d}".format(self.omp) + print( + " PARNODES for TM or COSMO-RS calculation was set " + "to {}".format(ENVIRON["PARNODES"]) + ) + except: + print("ERROR: PARNODES can not be changed!") + error_logical = True + raise + else: + print( + "ERROR: PARA_ARCH has to be set to SMP for parallel TM " + "calculations!" + ) + if self.run: + error_logical = True + except: + print( + "ERROR: PARA_ARCH has to be set to SMP and PARNODES have to " + "be set\n for parallel TM calculations!." + ) + if requirements.get("startenso", False): + error_logical = True + raise + if requirements.get("needescf", False): + if ( + self.external_paths["escfpath"] is None + or shutil.which(self.external_paths["escfpath"]) is None + ): + print("ERROR: path for escf is not correct!") + error_logical = True + if requirements.get("needmpshift", False): + if ( + self.external_paths["mpshiftpath"] is None + or shutil.which(self.external_paths["mpshiftpath"]) is None + ): + print("ERROR: path for mpshift is not correct!") + error_logical = True + # COSMORS + if requirements.get("needcosmors", False): + if self.external_paths["cosmorssetup"] is None: + print("ERROR: Set up for COSMO-RS has to be written to .censorc!") + error_logical = True + if self.external_paths["cosmothermversion"] is None: + print("ERROR: Version of COSMO-RS has to be written to .censorc!") + error_logical = True + if shutil.which("cosmotherm") is not None: + print(" Using COSMOtherm from {}".format(shutil.which("cosmotherm"))) + else: + print("ERROR: COSMOtherm has not been found!") + error_logical = True + # update cfg.external paths + external_paths.update(self.external_paths) + return error_logical + + def write_rcfile(self, pathtofile): + """ + write new global configruation file into the current directroy. + """ + args_key = {v: k for k, v in self.key_args_dict.items()} + with open(pathtofile, "w", newline=None) as outdata: + outdata.write("$CENSO global configuration file: .censorc\n") + outdata.write(f"$VERSION:{__version__} \n") + outdata.write("\n") + outdata.write("ORCA: /path/excluding/binary/\n") + outdata.write("ORCA version: 4.2.1\n") + outdata.write("GFN-xTB: /path/including/binary/xtb-binary\n") + outdata.write("CREST: /path/including/binary/crest-binary\n") + outdata.write("mpshift: /path/including/binary/mpshift-binary\n") + outdata.write("escf: /path/including/binary/escf-binary\n") + outdata.write("\n") + outdata.write("#COSMO-RS\n") + outdata.write( + "ctd = BP_TZVP_C30_1601.ctd cdir = " + '"/software/cluster/COSMOthermX16/COSMOtherm/CTDATA-FILES" ldir = ' + '"/software/cluster/COSMOthermX16/COSMOtherm/CTDATA-FILES"\n' + ) + outdata.write("cosmothermversion: 16\n") + outdata.write("$ENDPROGRAMS\n\n") + outdata.write("$CRE SORTING SETTINGS:\n") + outdata.write("$GENERAL SETTINGS:\n") + for key in OrderedDict(self.defaults_refine_ensemble_general): + value = self._exchange_onoff( + OrderedDict(self.defaults_refine_ensemble_general)[key]["default"], + reverse=True, + ) + options = self.value_options.get(key, "possibilities") + if key == "nconf" and value is None: + value = "all" + key = args_key.get(key, key) + outdata.write(format_line(key, value, options)) + outdata.write("\n$PART0 - CHEAP-PRESCREENING - SETTINGS:\n") + for key in OrderedDict(self.defaults_refine_ensemble_part0): + value = self._exchange_onoff( + OrderedDict(self.defaults_refine_ensemble_part0)[key]["default"], + reverse=True, + ) + options = self.value_options.get(key, "possibilities") + key = args_key.get(key, key) + outdata.write(format_line(key, value, options)) + outdata.write("\n$PART1 - PRESCREENING - SETTINGS:\n") + outdata.write("# func and basis is set under GENERAL SETTINGS\n") + for key in OrderedDict(self.defaults_refine_ensemble_part1): + value = self._exchange_onoff( + OrderedDict(self.defaults_refine_ensemble_part1)[key]["default"], + reverse=True, + ) + options = self.value_options.get(key, "possibilities") + key = args_key.get(key, key) + outdata.write(format_line(key, value, options)) + outdata.write("\n$PART2 - OPTIMIZATION - SETTINGS:\n") + outdata.write("# func and basis is set under GENERAL SETTINGS\n") + for key in OrderedDict(self.defaults_refine_ensemble_part2): + value = self._exchange_onoff( + OrderedDict(self.defaults_refine_ensemble_part2)[key]["default"], + reverse=True, + ) + options = self.value_options.get(key, "possibilities") + key = args_key.get(key, key) + outdata.write(format_line(key, value, options)) + outdata.write("\n$PART3 - REFINEMENT - SETTINGS:\n") + for key in OrderedDict(self.defaults_refine_ensemble_part3): + value = self._exchange_onoff( + OrderedDict(self.defaults_refine_ensemble_part3)[key]["default"], + reverse=True, + ) + options = self.value_options.get(key, "possibilities") + key = args_key.get(key, key) + outdata.write(format_line(key, value, options)) + outdata.write("\n$NMR PROPERTY SETTINGS:\n") + outdata.write("$PART4 SETTINGS:\n") + for key in OrderedDict(self.defaults_nmrprop_part4): + value = self._exchange_onoff( + OrderedDict(self.defaults_nmrprop_part4)[key]["default"], + reverse=True, + ) + options = self.value_options.get(key, "possibilities") + key = args_key.get(key, key) + outdata.write(format_line(key, value, options)) + outdata.write("\n$OPTICAL ROTATION PROPERTY SETTINGS:\n") + outdata.write("$PART5 SETTINGS:\n") + for key in OrderedDict(self.defaults_optical_rotation_part5): + value = self._exchange_onoff( + OrderedDict(self.defaults_optical_rotation_part5)[key]["default"], + reverse=True, + ) + options = self.value_options.get(key, "possibilities") + key = args_key.get(key, key) + outdata.write(format_line(key, value, options)) + outdata.write("$END CENSORC\n") + + def write_enso_inp(self, path=None): + """ + Write file "enso.inp" which is the control file for the calculation + """ + if path is None: + path = self.cwd + with open(os.path.join(path, "enso.inp"), "w", newline=None) as inp: + inp.write("$ File: enso.inp settings of current calculation\n") + data = self.provide_runinfo(extend=False) + for key in data.keys(): + value = data[key] + options = self.value_options.get(key, "possibilities") + if key == "nconf" and value is None: + value = "all" + value = self._exchange_onoff(value, reverse=True) + # limit printout of possibilities + if len(str(options)) > 80: + length = 0 + reduced = [] + for item in options: + length += len(item) + 2 + if length < 80: + reduced.append(item) + reduced.append("...") + options = reduced + length = 0 + inp.write( + "{}: {:{digits}} # {}\n".format( + str(key), str(value), options, digits=30 - len(str(key)) + ) + ) + inp.write("$end\n") + + def read_json(self, path, silent=False): + """ + Reading stored data on conformers and information on settings of + previous run. + """ + if os.path.isfile(path): + if not silent: + print("Reading file: {}\n".format(os.path.basename(path))) + try: + with open(path, "r", encoding=CODING, newline=None) as inp: + save_data = json.load(inp, object_pairs_hook=OrderedDict) + except (ValueError, TypeError, FileNotFoundError): + print("Your Jsonfile (enso.json) is corrupted!\n") + time.sleep(0.02) + raise + return save_data + + def write_json(self, path, conformers, settings, outfile="enso.json"): + """ + Dump conformer data and settings information of current run to json file + """ + data = {} + if not isinstance(settings, OrderedDict): + data["settings"] = vars(settings) + else: + data["settings"] = settings + try: + conformers.sort(key=lambda x: int(x["id"])) + except: + pass + for conf in conformers: + if not isinstance(conf, OrderedDict): + data[conf.id] = vars(conf) + else: + data[conf["id"]] = conf + with open(os.path.join(path, outfile), "w") as out: + json.dump(data, out, indent=4, sort_keys=False) diff --git a/censo_qm/nmrproperties.py b/censo_qm/nmrproperties.py new file mode 100644 index 0000000..bbddf4e --- /dev/null +++ b/censo_qm/nmrproperties.py @@ -0,0 +1,765 @@ +""" +module for the calculation of shiedling and coupling constants +""" + +import os +import shutil +import sys +from random import normalvariate +from multiprocessing import JoinableQueue as Queue +from .cfg import PLENGTH, DIGILEN, AU2KCAL +from .parallel import run_in_parallel +from .orca_job import OrcaJob +from .tm_job import TmJob +from .utilities import ( + calc_boltzmannweights, + printout, + print_block, + new_folders, + last_folders, + print, + write_anmrrc, + calc_std_dev, +) + + +def read_chemeq(path): + """read chemeq from anmr_nucinfo""" + with open(path, "r") as inp: + data = inp.readlines() + nat = int(data[0].split()[0]) + tmpeq = {} + for i in range(1, nat * 2 + 1): + if i % 2 != 0: + nextatom = int(data[i].split()[0]) + elif i % 2 == 0: + equalatoms = [int(x) for x in data[i].split()] + tmpeq[nextatom] = sorted(equalatoms) + return tmpeq + + +def read_exp_ref(path): + """read experimental reference shifts""" + with open(os.path.join(path, ".ref"), "r") as inp: + data = inp.readlines() + expref = {} + for line in data[1:]: + if line not in ["\n", "\r\n"]: + try: + expref[int(line.split()[0])] = float(line.split()[1]) + except ValueError: + pass + return expref + + +def get_atom(path): + """read coord""" + with open(os.path.join(path, "coord"), "r") as inp: + data = inp.readlines() + element = {} + i = 1 + for line in data[1:]: + if "$" in line: # stop at $end ... + break + element[i] = str(line.split()[3].lower()) + i += 1 + return element + + +def average_shieldings(config, calculate, element_ref_shield, energy, solv, rrho): + """ + Read chemical equivalence and Boltzmann average the calculated shielding constants + """ + path_anmr_nucinfo = os.path.join(config.cwd, "anmr_nucinfo") + if not os.path.isfile(path_anmr_nucinfo): + print(f"File anmr_nucinfo (generated by CREST) is not present!") + return + chemeq = read_chemeq(path_anmr_nucinfo) + averaged = {} + element = {} + + sigma_std_dev = {} + for i in range(1, config.nat + 1): + sigma_std_dev[i] = [] + + for conf in calculate: + # get shielding constants + if not element: + element = get_atom( + os.path.normpath(os.path.join(config.cwd, "CONF" + str(conf.id), "NMR")) + ) + for atom in conf.shieldings.keys(): + sigma = sum( + [conf.shieldings.get(eq_atom, 0.0) for eq_atom in chemeq[atom]] + ) / len(chemeq[atom]) + averaged[atom] = conf.bm_weight * sigma + averaged.get(atom, 0.0) + + for _ in range(1000): + for conf in calculate: + conf.calc_free_energy(e=energy, solv=solv, rrho=rrho) + conf.free_energy += normalvariate( + 0.0, conf.lowlevel_gsolv_compare_info["std_dev"] + ) + calculate = calc_boltzmannweights(calculate, "free_energy", config.temperature) + for conf in calculate: + # get shielding constants + if not element: + element = get_atom( + os.path.normpath( + os.path.join(config.cwd, "CONF" + str(conf.id), "NMR") + ) + ) + for atom in conf.shieldings.keys(): + sigma = sum( + [conf.shieldings.get(eq_atom, 0.0) for eq_atom in chemeq[atom]] + ) / len(chemeq[atom]) + sigma_std_dev[atom].append( + conf.bm_weight * sigma + averaged.get(atom, 0.0) + ) + + print("\nAveraged shielding constants:") + print("# in coord element σ(sigma) SD(σ based on SD Gsolv) shift") + print("".ljust(int(70), "-")) + maxsigma = max([len(str(sigma).split(".")[0]) for sigma in averaged.values()]) + 5 + make_shift = ( + lambda atom: f"{-sigma+element_ref_shield.get(element[atom], 0.0):> {maxsigma}.2f}" + if (element_ref_shield.get(element[atom], None) is not None) + else "None" + ) + for atom, sigma in averaged.items(): + try: + std_dev = calc_std_dev(sigma_std_dev[atom]) + except Exception as e: + print(e) + std_dev = 0.0 + try: + print( + f"{atom:< {10}} {element[atom]:^{7}} {sigma:> {maxsigma}.2f} " + f"{std_dev:^ 24.6f} {make_shift(atom):>5}" + ) + except: + print(f"{atom:< {10}} {element[atom]:^{7}} {sigma:> {maxsigma}.2f}") + print("".ljust(int(70), "-")) + + +def part4(config, conformers, store_confs, ensembledata): + """ + Calculate nmr properties: shielding and coupling constants on the populated + conformers (either directly from part2 OPTIMIZATION or after REFINEMENT + (part3)) + """ + save_errors = [] + print("\n" + "".ljust(PLENGTH, "-")) + print("NMR MODE - PART4".center(PLENGTH, " ")) + print("".ljust(PLENGTH, "-") + "\n") + # print flags for part3 + info = [] + info.append(["couplings", "calculate coupling constants"]) + if config.couplings: + info.append(["prog4_j", "prog4J - program for coupling constant calculation"]) + info.append(["func_j", "funcJ - functional for coupling constant calculation"]) + info.append(["basis_j", "basisJ - basis for coupling constant calculation"]) + if config.solvent != "gas": + info.append(["sm4_j", "sm4J - solvent model for the coupling calculation"]) + info.append(["shieldings", "calculate shielding constants σ"]) + if config.shieldings: + info.append(["prog4_s", "prog4S - program for shielding constant calculation"]) + info.append(["func_s", "funcS - functional for shielding constant calculation"]) + info.append(["basis_s", "basisS - basis for shielding constant calculation"]) + if config.solvent != "gas": + info.append(["sm4_s", "sm4S - solvent model for the shielding calculation"]) + info.append(["resonance_frequency", "spectrometer frequency"]) + # active nuclei + + optionsexchange = {True: "on", False: "off"} + for item in info: + if item[0] == "justprint": + print(item[1:][0]) + else: + if item[0] == "printoption": + option = item[2] + else: + option = getattr(config, item[0]) + if option is True or option is False: + option = optionsexchange[option] + elif isinstance(option, list): + option = [str(i) for i in option] + if len(str(option)) > 40: + length = 0 + reduced = [] + for i in option: + length += len(i) + 2 + if length < 40: + reduced.append(i) + reduced.append("...") + option = reduced + length = 0 + option = ", ".join(option) + print( + "{}: {:{digits}} {}".format( + item[1], "", option, digits=DIGILEN - len(item[1]) + ) + ) + print("") + # end print + + calculate = [] # has to be calculated in this run + prev_calculated = [] # was already calculated in a previous run + try: + store_confs + except NameError: + store_confs = [] # stores all confs which are sorted out! + + # setup queues + q = Queue() + resultq = Queue() + + # sort conformers: + for conf in list(conformers): + if conf.removed: + store_confs.append(conformers.pop(conformers.index(conf))) + print(f"CONF{conf.id} is removed as requested by the user!") + continue + if ( + conf.part_info["part2"] == "passed" + and conf.optimization_info["info"] == "calculated" + ): + if not config.part3: + # part3 is not calculated use boltzmann weights directly from part2 + energy = "lowlevel_sp_info" + rrho = "lowlevel_grrho_info" + gsolv = "lowlevel_gsolv_info" + boltzmannthr = config.part2_threshold + elif config.part3: + # calc boltzmann weights from part3 + energy = "highlevel_sp_info" + rrho = "highlevel_grrho_info" + gsolv = "highlevel_gsolv_info" + boltzmannthr = config.part3_threshold + else: + print("UNEXPECTED BEHAVIOUR") + mol = conformers.pop(conformers.index(conf)) + if getattr(conf, energy)["info"] != "calculated": + store_confs.append(mol) + continue + elif getattr(conf, rrho)["info"] != "calculated" and config.evaluate_rrho: + store_confs.append(mol) + continue + elif ( + getattr(conf, gsolv)["info"] != "calculated" and config.solvent != "gas" + ): + store_confs.append(mol) + continue + else: + calculate.append(mol) + else: + print( + f"WARNING: CONF{conf.id} has not been optimized (part2)! " + f"Removing CONF{conf.id}" + ) + conf = conformers.pop(conformers.index(conf)) + store_confs.append(conf) + + if not calculate and not prev_calculated: + print("ERROR: No conformers left!") + print("Going to exit!") + sys.exit(1) + + calculate.sort(key=lambda x: int(x.id)) + print("Considering the following conformers:") + print_block(["CONF" + str(i.id) for i in calculate]) + + # Calculate boltzmann weight for confs: + if not config.part3: + if not config.evaluate_rrho: + rrho = None + else: + rrho_method, _ = config.get_method_name( + "rrhoxtb", + bhess=config.bhess, + gfn_version=config.part2_gfnv, + sm=config.sm_rrho, + solvent=config.solvent, + ) + if config.solvent == "gas": + gsolv = None + energy_method, _ = config.get_method_name( + "xtbopt", + func=config.func, + basis=config.basis, + sm=config.smgsolv2, + gfn_version=config.part2_gfnv, + solvent=config.solvent, + ) + else: + if config.smgsolv2 in ("cosmors", "cosmors-fine"): + tmp_name = "cosmors" + elif config.smgsolv2 in ("alpb_gsolv", "gbsa_gsolv", "smd_gsolv"): + tmp_name = config.smgsolv2 + else: + tmp_name = "sp_implicit" + energy_method, solv_method = config.get_method_name( + tmp_name, + func=config.func, + basis=config.basis, + sm=config.smgsolv2, + gfn_version=config.part2_gfnv, + solvent=config.solvent, + ) + elif config.part3: + if not config.evaluate_rrho: + rrho = None + else: + rrho_method, _ = config.get_method_name( + "rrhoxtb", + bhess=config.bhess, + gfn_version=config.part3_gfnv, + sm=config.sm_rrho, + solvent=config.solvent, + ) + if config.solvent == "gas": + gsolv = None + energy_method, _ = config.get_method_name( + "xtbopt", + func=config.func3, + basis=config.basis3, + sm=config.smgsolv3, + gfn_version=config.part3_gfnv, + solvent=config.solvent, + ) + else: + if config.smgsolv3 in ("cosmors", "cosmors-fine"): + tmp_name = "cosmors" + elif config.smgsolv3 in ("alpb_gsolv", "gbsa_gsolv", "smd_gsolv"): + tmp_name = config.smgsolv3 + else: + tmp_name = "sp_implicit" + energy_method, solv_method = config.get_method_name( + tmp_name, + func=config.func3, + basis=config.basis3, + sm=config.smgsolv3, + gfn_version=config.part3_gfnv, + solvent=config.solvent, + ) + + for conf in calculate: + conf.calc_free_energy(e=energy, solv=gsolv, rrho=rrho) + calculate = calc_boltzmannweights(calculate, "free_energy", config.temperature) + try: + minfree = min([i.free_energy for i in calculate if i is not None]) + except ValueError: + raise + for conf in calculate: + conf.rel_free_energy = (conf.free_energy - minfree) * AU2KCAL + calculate.sort(key=lambda x: int(x.id)) + + # printout for part4 ------------------------------------------------------- + print("\n" + "".ljust(int(PLENGTH / 2), "-")) + print("* Gibbs free energies used in part4 *".center(int(PLENGTH / 2), " ")) + print("".ljust(int(PLENGTH / 2), "-") + "\n") + columncall = [ + lambda conf: "CONF" + str(getattr(conf, "id")), + lambda conf: getattr(conf, energy)["energy"], + lambda conf: getattr(conf, gsolv)["energy"], + lambda conf: getattr(conf, rrho)["energy"], + lambda conf: getattr(conf, "free_energy"), + lambda conf: getattr(conf, "rel_free_energy"), + lambda conf: getattr(conf, "bm_weight") * 100, + ] + columnheader = [ + "CONF#", + "E [Eh]", + "Gsolv [Eh]", + "GmRRHO [Eh]", + "Gtot", + "ΔGtot", + "Boltzmannweight", + ] + columndescription = [ + "", + "", + "", + "", + "[Eh]", + "[kcal/mol]", + f" % at {config.temperature:.2f} K", + ] + columnformat = ["", (12, 7), (12, 7), (12, 7), (12, 7), (5, 2), (5, 2)] + columndescription[1] = energy_method + columndescription[2] = solv_method + columndescription[3] = rrho_method + if not config.evaluate_rrho or config.solvent == "gas": + if not config.evaluate_rrho: + # ignore rrho in printout + columncall.pop(3) + columnheader.pop(3) + columndescription.pop(3) + columnformat.pop(3) + if config.solvent == "gas": + columncall.pop(2) + columnheader.pop(2) + columndescription.pop(2) + columnformat.pop(2) + + printout( + os.path.join(config.cwd, "part4.dat"), + columncall, + columnheader, + columndescription, + columnformat, + calculate, + minfree, + ) + calculate.sort(reverse=True, key=lambda x: float(x.bm_weight)) + sumup = 0.0 + for conf in list(calculate): + sumup += conf.bm_weight + if sumup >= boltzmannthr: + if conf.bm_weight < (1 - boltzmannthr): + store_confs.append(calculate.pop(calculate.index(conf))) + print(f"\nConformers that are below the Boltzmann-thr of {boltzmannthr}:") + print_block(["CONF" + str(i.id) for i in calculate]) + + # create NMR folder + folder = "NMR" + save_errors, store_confs, calculate = new_folders( + config.cwd, calculate, folder, save_errors, store_confs + ) + # need to copy optimized coord to folder + for conf in list(calculate): + tmp1 = os.path.join(config.cwd, "CONF" + str(conf.id), config.func, "coord") + tmp2 = os.path.join("CONF" + str(conf.id), folder, "coord") + try: + shutil.copy(tmp1, tmp2) + except FileNotFoundError: + print("ERROR can't copy optimized geometry!") + store_confs.append(calculate.pop(calculate.index(conf))) + if config.couplings: + print("\nPerforming coupling constant calculations:") + # check if J calculated before! + for conf in list(calculate): + if getattr(conf, "nmr_coupling_info")["info"] == "calculated": + prev_calculated.append(calculate.pop(calculate.index(conf))) + elif getattr(conf, "nmr_coupling_info")["info"] == "failed": + store_confs.append(calculate.pop(calculate.index(conf))) + else: + # still in calculate + pass + + if not calculate + prev_calculated: + print("ERROR: No conformers left!") + print("Going to exit!") + sys.exit(1) + + instruction_j = { + "jobtype": "couplings_sp", + "prepinfo": ["high+"], + "func": config.func_j, + "basis": config.basis_j, + "charge": config.charge, + "unpaired": config.unpaired, + "solvent": config.solvent, + "sm": config.sm4_j, + "success": False, + "omp": config.omp, + # nmractive nuclei + "h_active": config.h_active, + "c_active": config.c_active, + "f_active": config.f_active, + "p_active": config.p_active, + "si_active": config.si_active, + } + if config.prog4_j == "orca": + job = OrcaJob + instruction_j["progpath"] = config.external_paths["orcapath"] + instruction_j["method"], _ = config.get_method_name( + instruction_j["jobtype"], + func=instruction_j["func"], + basis=instruction_j["basis"], + sm=instruction_j["sm"], + solvent=instruction_j["solvent"], + prog=config.prog4_j, + ) + elif config.prog4_j == "tm": + job = TmJob + instruction_j["progpath"] = config.external_paths["escfpath"] + instruction_j["method"], _ = config.get_method_name( + instruction_j["jobtype"], + func=instruction_j["func"], + basis=instruction_j["basis"], + sm=instruction_j["sm"], + solvent=instruction_j["solvent"], + prog=config.prog4_j, + ) + # escf no mgrid!!!! + elif config.prog4_j == "adf": + instruction_j["method"], _ = config.get_method_name( + instruction_j["jobtype"], + func=instruction_j["func"], + basis=instruction_j["basis"], + sm=instruction_j["sm"], + solvent=instruction_j["solvent"], + prog=config.prog4_j, + ) + check = {True: "was successful", False: "FAILED"} + pl = config.lenconfx + 4 + len(str("/" + folder)) + if calculate: + calculate = run_in_parallel( + config, + q, + resultq, + job, + config.maxthreads, + calculate, + instruction_j, + folder, + ) + for conf in list(calculate): + line = ( + f"Coupling constant calculation {check[conf.job['success']]}" + f" for {last_folders(conf.job['workdir'], 2):>{pl}}: " + ) + print(line) + if not conf.job["success"]: + save_errors.append(line) + conf.nmr_coupling_info["info"] = "failed" + conf.nmr_coupling_info["method"] = instruction_j["method"] + conf.nmr_coupling_info["h_active"] = instruction_j["h_active"] + conf.nmr_coupling_info["c_active"] = instruction_j["c_active"] + conf.nmr_coupling_info["f_active"] = instruction_j["f_active"] + conf.nmr_coupling_info["si_active"] = instruction_j["si_active"] + conf.nmr_coupling_info["p_active"] = instruction_j["p_active"] + conf.part_info["part4"] = "refused" + store_confs.append(calculate.pop(calculate.index(conf))) + else: + conf.nmr_coupling_info["info"] = "calculated" + conf.nmr_coupling_info["method"] = instruction_j["method"] + conf.nmr_coupling_info["h_active"] = instruction_j["h_active"] + conf.nmr_coupling_info["c_active"] = instruction_j["c_active"] + conf.nmr_coupling_info["f_active"] = instruction_j["f_active"] + conf.nmr_coupling_info["si_active"] = instruction_j["si_active"] + conf.nmr_coupling_info["p_active"] = instruction_j["p_active"] + # save current data to jsonfile + config.write_json( + config.cwd, + [i.provide_runinfo() for i in calculate] + + [i.provide_runinfo() for i in prev_calculated] + + [i.provide_runinfo() for i in store_confs] + + [ensembledata], + config.provide_runinfo(), + ) + + if prev_calculated: + for conf in list(prev_calculated): + conf.job["workdir"] = os.path.normpath( + os.path.join(config.cwd, "CONF" + str(conf.id), folder) + ) + line = ( + f"Coupling constant calculation {check[True]}" + f" for {last_folders(conf.job['workdir'], 2):>{pl}}: " + ) + print(line) + calculate.append(prev_calculated.pop(prev_calculated.index(conf))) + + for conf in calculate: + conf.reset_job_info() + if not calculate: + print("ERROR: No conformers left!") + print("Going to exit!") + sys.exit(1) + + if config.shieldings: + print("\nPerforming shielding constant calculations:") + # start shielding constants + # check if S calculated before! + for conf in list(calculate): + if getattr(conf, "nmr_shielding_info")["info"] == "calculated": + prev_calculated.append(calculate.pop(calculate.index(conf))) + elif getattr(conf, "nmr_shielding_info")["info"] == "failed": + store_confs.append(calculate.pop(calculate.index(conf))) + for conf in calculate: + conf.reset_job_info() + if not calculate + prev_calculated: + print("ERROR: No conformers left!") + print("Going to exit!") + sys.exit(1) + + instruction_s = { + "jobtype": "shieldings_sp", + "prepinfo": ["high+"], + "func": config.func_s, + "basis": config.basis_s, + "charge": config.charge, + "unpaired": config.unpaired, + "solvent": config.solvent, + "sm": config.sm4_s, + "success": False, + "omp": config.omp, + # nmractive nuclei + "h_active": config.h_active, + "c_active": config.c_active, + "f_active": config.f_active, + "p_active": config.p_active, + "si_active": config.si_active, + } + + if config.basis_j != config.basis_s: + # do a new calculation + # cefine if turbomole + instruction_s["prepinfo"] = ["high+"] + if config.prog4_j != config.prog4_s: + # do a new calculation + # cefine if turbomole + instruction_s["prepinfo"] = ["high+"] + if (config.basis_j == config.basis_s) and (config.prog4_j == config.prog4_s): + if config.func_j == config.func_s and config.couplings: + instruction_s["prepinfo"] = [] + # don't do single-point + instruction_s["jobtype"] = "shieldings" + elif config.func_j != config.func_s and config.couplings: + instruction_s["prepinfo"] = ["high+"] + # use already converged mos as start mos + if config.prog4_s == "tm": + instruction_s["copymos"] = "mos_j" + instruction_s["jobtype"] = "shieldings_sp" + + if config.prog4_s == "orca": + job = OrcaJob + instruction_s["progpath"] = config.external_paths["orcapath"] + instruction_s["method"], _ = config.get_method_name( + instruction_s["jobtype"], + func=instruction_s["func"], + basis=instruction_s["basis"], + sm=instruction_s["sm"], + solvent=instruction_s["solvent"], + prog=config.prog4_s, + ) + elif config.prog4_s == "tm": + job = TmJob + instruction_s["progpath"] = config.external_paths["mpshiftpath"] + instruction_s["method"], _ = config.get_method_name( + instruction_s["jobtype"], + func=instruction_s["func"], + basis=instruction_s["basis"], + sm=instruction_s["sm"], + solvent=instruction_s["solvent"], + prog=config.prog4_j, + ) + elif config.prog4_s == "adf": + instruction_s["method"], _ = config.get_method_name( + instruction_s["jobtype"], + func=instruction_s["func"], + basis=instruction_s["basis"], + sm=instruction_s["sm"], + solvent=instruction_s["solvent"], + prog=config.prog4_j, + ) + check = {True: "was successful", False: "FAILED"} + pl = config.lenconfx + 4 + len(str("/" + folder)) + if calculate: + calculate = run_in_parallel( + config, + q, + resultq, + job, + config.maxthreads, + calculate, + instruction_s, + folder, + ) + for conf in list(calculate): + line = ( + f"Shielding constant calculation {check[conf.job['success']]}" + f" for {last_folders(conf.job['workdir'], 2):>{pl}}: " + ) + print(line) + if not conf.job["success"]: + save_errors.append(line) + conf.nmr_shielding_info["info"] = "failed" + conf.nmr_shielding_info["method"] = instruction_s["method"] + conf.nmr_shielding_info["h_active"] = instruction_s["h_active"] + conf.nmr_shielding_info["c_active"] = instruction_s["c_active"] + conf.nmr_shielding_info["f_active"] = instruction_s["f_active"] + conf.nmr_shielding_info["si_active"] = instruction_s["si_active"] + conf.nmr_shielding_info["p_active"] = instruction_s["p_active"] + conf.part_info["part4"] = "refused" + store_confs.append(calculate.pop(calculate.index(conf))) + else: + conf.nmr_shielding_info["info"] = "calculated" + conf.nmr_shielding_info["method"] = instruction_s["method"] + conf.nmr_shielding_info["h_active"] = instruction_s["h_active"] + conf.nmr_shielding_info["c_active"] = instruction_s["c_active"] + conf.nmr_shielding_info["f_active"] = instruction_s["f_active"] + conf.nmr_shielding_info["si_active"] = instruction_s["si_active"] + conf.nmr_shielding_info["p_active"] = instruction_s["p_active"] + # save current data to jsonfile + config.write_json( + config.cwd, + [i.provide_runinfo() for i in calculate] + + [i.provide_runinfo() for i in prev_calculated] + + [i.provide_runinfo() for i in store_confs] + + [ensembledata], + config.provide_runinfo(), + ) + if prev_calculated: + for conf in list(prev_calculated): + conf.job["workdir"] = os.path.normpath( + os.path.join(config.cwd, "CONF" + str(conf.id), folder) + ) + line = ( + f"Shielding constant calculation {check[True]}" + f" for {last_folders(conf.job['workdir'], 2):>{pl}}: " + ) + print(line) + calculate.append(prev_calculated.pop(prev_calculated.index(conf))) + + # write anmr_enso output! + print("\nGenerating file anmr_enso for processing with the ANMR program.") + for conf in calculate: + conf.calc_free_energy(e=energy, solv=gsolv, rrho=rrho) + calculate = calc_boltzmannweights(calculate, "free_energy", config.temperature) + try: + length = max([str(i.id) for i in calculate]) + if length < 4: + length = 4 + fmtenergy = max([len("{:.7f}".format(i.free_energy)) for i in calculate]) + except: + length = 6 + fmtenergy = 10 + with open(os.path.join(config.cwd, "anmr_enso"), "w", newline=None) as out: + out.write( + f"{'ONOFF':5} {'NMR':^{length}} {'CONF':^{length}} {'BW':6} " + f"{'Energy':{fmtenergy}} {'Gsolv':7} {'mRRHO':7} {'gi':7}\n" + ) + for conf in calculate: + out.write( + f"{1:<5} {conf.id:{length}} {conf.id:{length}} " + f"{conf.bm_weight:.4f} {getattr(conf, energy)['energy']:.5f} " + f"{getattr(conf, gsolv)['energy']:.5f} " + f"{getattr(conf, rrho)['energy']:.5f} " + f"{conf.gi:.3f}\n" + ) + + # write .anmrrc + print("\nWriting .anmrrc!") + element_ref_shield = write_anmrrc(config) + + print("\nGenerating plain nmrprop.dat files for each populated conformer.") + print("These files contain all calculated shielding and coupling constants.") + print("The files can be read by ANMR using the keyword '-plain'.\n") + # write generic: + instructgeneric = {"jobtype": "genericout", "nat": int(config.nat)} + calculate = run_in_parallel( + config, q, resultq, job, config.maxthreads, calculate, instructgeneric, folder + ) + + # printout the averaged shielding constants + average_shieldings(config, calculate, element_ref_shield, energy, gsolv, rrho) + + for conf in calculate: + conf.reset_job_info() + + # end printout for part4 + print("\n\n") + return config, calculate, store_confs, ensembledata diff --git a/censo_qm/opticalrotation.py b/censo_qm/opticalrotation.py new file mode 100644 index 0000000..5eb73b4 --- /dev/null +++ b/censo_qm/opticalrotation.py @@ -0,0 +1,585 @@ +""" +module for the calculation of optical rotation +""" + +import os +import shutil +import sys +from random import normalvariate +from multiprocessing import JoinableQueue as Queue +from .cfg import PLENGTH, DIGILEN, AU2KCAL +from .parallel import run_in_parallel +from .orca_job import OrcaJob +from .tm_job import TmJob +from .utilities import ( + calc_boltzmannweights, + printout, + print_block, + new_folders, + last_folders, + print, + calc_std_dev, + ensemble2coord, +) + + +def part5(config, conformers, store_confs, ensembledata): + """ + Calculate optical rotation on the populated + conformers (either directly from part2 OPTIMIZATION or after REFINEMENT + (part3)) + """ + save_errors = [] + print("\n" + "".ljust(PLENGTH, "-")) + print("OPTICAL ROTATION MODE - PART5".center(PLENGTH, " ")) + print("".ljust(PLENGTH, "-") + "\n") + # print flags for part5 + info = [] + info.append(["optical_rotation", "Part5"]) + info.append(["freq_or", "frequency in [nm]"]) + info.append(["func_or_scf", "functional for SCF"]) + info.append(["func_or", "functional for optical rotation"]) + info.append(["basis_or", "basis set for optical rotation"]) + if config.part3: + info.append(["part3_threshold", "Boltzmann sum threshold employed"]) + elif config.part2: + info.append(["part2_threshold", "Boltzmann sum threshold employed"]) + elif config.part1: + info.append(["part2_threshold", "Boltzmann sum threshold employed"]) + if config.solvent != "gas": + info.append(["solvent", "solvent"]) + if config.prog == "tm": + info.append(["printoption", "solvation model", "cosmo"]) + optionsexchange = {True: "on", False: "off"} + for item in info: + if item[0] == "justprint": + print(item[1:][0]) + else: + if item[0] == "printoption": + option = item[2] + else: + option = getattr(config, item[0]) + if option is True or option is False: + option = optionsexchange[option] + elif isinstance(option, list): + option = [str(i) for i in option] + if len(str(option)) > 40: + length = 0 + reduced = [] + for i in option: + length += len(i) + 2 + if length < 40: + reduced.append(i) + reduced.append("...") + option = reduced + length = 0 + option = ", ".join(option) + print( + "{}: {:{digits}} {}".format( + item[1], "", option, digits=DIGILEN - len(item[1]) + ) + ) + print("") + # # end print + + calculate = [] # has to be calculated in this run + prev_calculated = [] # was already calculated in a previous run + try: + store_confs + except NameError: + store_confs = [] # stores all confs which are sorted out! + + # setup queues + q = Queue() + resultq = Queue() + + unoptimized_warning = False + # sort conformers: + for conf in list(conformers): + if conf.removed: + store_confs.append(conformers.pop(conformers.index(conf))) + print(f"CONF{conf.id} is removed as requested by the user!") + continue + if ( + conf.part_info["part2"] != "passed" + and conf.optimization_info["info"] != "calculated" + ): + unoptimized_warning = True + + if config.part3: + # calc boltzmann weights from part3 + energy = "highlevel_sp_info" + rrho = "highlevel_grrho_info" + gsolv = "highlevel_gsolv_info" + boltzmannthr = config.part3_threshold + elif config.part2: + # part3 is not calculated use boltzmann weights directly from part2 + energy = "lowlevel_sp_info" + rrho = "lowlevel_grrho_info" + gsolv = "lowlevel_gsolv_info" + boltzmannthr = config.part2_threshold + elif config.part1: + # part2 is not calculated use boltzmann weights directly from part1 + #--> misappropriate config.part2_threshold + # This means starting from not DFT optimized geometries! + energy = "prescreening_sp_info" + rrho = "prescreening_grrho_info" + gsolv = "prescreening_gsolv_info" + boltzmannthr = config.part2_threshold + else: + print("UNEXPECTED BEHAVIOUR") + mol = conformers.pop(conformers.index(conf)) + if getattr(conf, energy)["info"] != "calculated": + store_confs.append(mol) + continue + elif getattr(conf, rrho)["info"] != "calculated" and config.evaluate_rrho: + store_confs.append(mol) + continue + elif ( + getattr(conf, gsolv)["info"] != "calculated" and config.solvent != "gas" + ): + store_confs.append(mol) + continue + else: + calculate.append(mol) + + if unoptimized_warning: + print(f"WARNING: Conformers have not been optimized at DFT level!!!\n" + f" Use results with care!\n" + ) + + if not calculate and not prev_calculated: + print("ERROR: No conformers left!") + print("Going to exit!") + sys.exit(1) + + calculate.sort(key=lambda x: int(x.id)) + print("Considering the following conformers:") + print_block(["CONF" + str(i.id) for i in calculate]) + + # Calculate boltzmann weight for confs: + if config.part3: + if not config.evaluate_rrho: + rrho = None + else: + rrho_method, _ = config.get_method_name( + "rrhoxtb", + bhess=config.bhess, + gfn_version=config.part3_gfnv, + sm=config.sm_rrho, + solvent=config.solvent, + ) + if config.solvent == "gas": + gsolv = None + energy_method, _ = config.get_method_name( + "xtbopt", + func=config.func3, + basis=config.basis3, + sm=config.smgsolv3, + gfn_version=config.part3_gfnv, + solvent=config.solvent, + ) + else: + if config.smgsolv3 in ("cosmors", "cosmors-fine"): + tmp_name = "cosmors" + elif config.smgsolv3 in ("alpb_gsolv", "gbsa_gsolv", "smd_gsolv"): + tmp_name = config.smgsolv3 + else: + tmp_name = "sp_implicit" + energy_method, solv_method = config.get_method_name( + tmp_name, + func=config.func3, + basis=config.basis3, + sm=config.smgsolv3, + gfn_version=config.part3_gfnv, + solvent=config.solvent, + ) + elif config.part2: + if not config.evaluate_rrho: + rrho = None + else: + rrho_method, _ = config.get_method_name( + "rrhoxtb", + bhess=config.bhess, + gfn_version=config.part2_gfnv, + sm=config.sm_rrho, + solvent=config.solvent, + ) + if config.solvent == "gas": + gsolv = None + energy_method, _ = config.get_method_name( + "xtbopt", + func=config.func, + basis=config.basis, + sm=config.smgsolv2, + gfn_version=config.part2_gfnv, + solvent=config.solvent, + ) + else: + if config.smgsolv2 in ("cosmors", "cosmors-fine"): + tmp_name = "cosmors" + elif config.smgsolv2 in ("alpb_gsolv", "gbsa_gsolv", "smd_gsolv"): + tmp_name = config.smgsolv2 + else: + tmp_name = "sp_implicit" + energy_method, solv_method = config.get_method_name( + tmp_name, + func=config.func, + basis=config.basis, + sm=config.smgsolv2, + gfn_version=config.part2_gfnv, + solvent=config.solvent, + ) + elif config.part1: + # on DFT unoptimized geometries! + if not config.evaluate_rrho: + rrho = None + else: + rrho_method, _ = config.get_method_name( + "rrhoxtb", + bhess=config.bhess, + gfn_version=config.part1_gfnv, + sm=config.sm_rrho, + solvent=config.solvent, + ) + if config.solvent == "gas": + gsolv = None + energy_method, _ = config.get_method_name( + "xtbopt", + func=config.func, + basis=config.basis, + sm=config.smgsolv1, + gfn_version=config.part1_gfnv, + solvent=config.solvent, + ) + else: + if config.smgsolv1 in ("cosmors", "cosmors-fine"): + tmp_name = "cosmors" + elif config.smgsolv2 in ("alpb_gsolv", "gbsa_gsolv", "smd_gsolv"): + tmp_name = config.smgsolv1 + else: + tmp_name = "sp_implicit" + energy_method, solv_method = config.get_method_name( + tmp_name, + func=config.func, + basis=config.basis, + sm=config.smgsolv1, + gfn_version=config.part1_gfnv, + solvent=config.solvent, + ) + + for conf in calculate: + conf.calc_free_energy(e=energy, solv=gsolv, rrho=rrho) + calculate = calc_boltzmannweights(calculate, "free_energy", config.temperature) + try: + minfree = min([i.free_energy for i in calculate if i is not None]) + except ValueError: + raise + for conf in calculate: + conf.rel_free_energy = (conf.free_energy - minfree) * AU2KCAL + calculate.sort(key=lambda x: int(x.id)) + + # printout for part4 ------------------------------------------------------- + print("\n" + "".ljust(int(PLENGTH / 2), "-")) + print("* Gibbs free energies used in part5 *".center(int(PLENGTH / 2), " ")) + print("".ljust(int(PLENGTH / 2), "-") + "\n") + columncall = [ + lambda conf: "CONF" + str(getattr(conf, "id")), + lambda conf: getattr(conf, energy)["energy"], + lambda conf: getattr(conf, gsolv)["energy"], + lambda conf: getattr(conf, rrho)["energy"], + lambda conf: getattr(conf, "free_energy"), + lambda conf: getattr(conf, "rel_free_energy"), + lambda conf: getattr(conf, "bm_weight") * 100, + ] + columnheader = [ + "CONF#", + "E [Eh]", + "Gsolv [Eh]", + "GmRRHO [Eh]", + "Gtot", + "ΔGtot", + "Boltzmannweight", + ] + columndescription = [ + "", + "", + "", + "", + "[Eh]", + "[kcal/mol]", + f" % at {config.temperature:.2f} K", + ] + columnformat = ["", (12, 7), (12, 7), (12, 7), (12, 7), (5, 2), (5, 2)] + columndescription[1] = energy_method + if config.solvent != "gas": + columndescription[2] = solv_method + columndescription[3] = rrho_method + + if not config.evaluate_rrho or config.solvent == "gas": + if not config.evaluate_rrho: + # ignore rrho in printout + columncall.pop(3) + columnheader.pop(3) + columndescription.pop(3) + columnformat.pop(3) + if config.solvent == "gas": + columncall.pop(2) + columnheader.pop(2) + columndescription.pop(2) + columnformat.pop(2) + + printout( + os.path.join(config.cwd, "part5.dat"), + columncall, + columnheader, + columndescription, + columnformat, + calculate, + minfree, + ) + calculate.sort(reverse=True, key=lambda x: float(x.bm_weight)) + sumup = 0.0 + for conf in list(calculate): + sumup += conf.bm_weight + if sumup >= boltzmannthr: + if conf.bm_weight < (1 - boltzmannthr): + store_confs.append(calculate.pop(calculate.index(conf))) + print(f"\nConformers that are below the Boltzmann-thr of {boltzmannthr}:") + print_block(["CONF" + str(i.id) for i in calculate]) + + # create NMR folder + folder = "OR" + save_errors, store_confs, calculate = new_folders( + config.cwd, calculate, folder, save_errors, store_confs + ) + if config.part3 or config.part2: + # need to copy optimized coord to folder + for conf in list(calculate): + tmp1 = os.path.join(config.cwd, "CONF" + str(conf.id), config.func, "coord") + tmp2 = os.path.join("CONF" + str(conf.id), folder, "coord") + try: + shutil.copy(tmp1, tmp2) + except FileNotFoundError: + print("ERROR can't copy optimized geometry!") + store_confs.append(calculate.pop(calculate.index(conf))) + elif config.part1: + # do not use coord from folder config.func it could be optimized if + # part2 has ever been run, take coord from ensemble file + # write coord to folder + calculate, store_confs, save_errors = ensemble2coord( + config, folder, calculate, store_confs, save_errors + ) + + if not calculate: + print("ERROR: No conformers left!") + print("Going to exit!") + sys.exit(1) + + # check if OR calculated before! + for conf in list(calculate): + if getattr(conf, "optical_rotation_info")["info"] == "calculated": + prev_calculated.append(calculate.pop(calculate.index(conf))) + elif getattr(conf, "optical_rotation_info")["info"] == "failed": + store_confs.append(calculate.pop(calculate.index(conf))) + + instruction_or = { + "jobtype": "opt-rot_sp", # opt-rot only escf ; opt-rot_sp SP then escf + "func": config.func_or_scf, + "func2": config.func_or, + "basis": getattr( + config, + "basis_or", + config.func_basis_default.get(config.func, "def2-mTZVPP"), + ), + "charge": config.charge, + "unpaired": config.unpaired, + "solvent": "gas", + "sm": "cosmo", + "success": False, + "omp": config.omp, + "freq_or": config.freq_or, + } + if config.prog == "orca": + print("Can't calculate OR with ORCA! Use TM instead.") + # ORCA can't calculate optical rotation!!! --> + job = OrcaJob + instruction_or["progpath"] = config.external_paths["orcapath"] + if config.solvent != "gas": + instruction_or["solvent"] = config.solvent + instruction_or["sm"] = "cpcm" + if config.prog == "tm": + job = TmJob + instruction_or["prepinfo"] = ["clear", "-grid", "2", "-scfconv", "6"] + instruction_or["progpath"] = config.external_paths["escfpath"] + if config.basis == config.basis_or: + instruction_or["copymos"] = config.func + instruction_or["jobtype"] = "opt-rot" + if config.solvent != "gas": + instruction_or["solvent"] = config.solvent + instruction_or["sm"] = "cosmo" + + instruction_or["method"], _ = config.get_method_name( + instruction_or["jobtype"], + func=instruction_or["func2"], + basis=instruction_or["basis"], + solvent=instruction_or["solvent"], + prog=config.prog, + func2=instruction_or["func"], + sm=instruction_or["sm"], + ) + + print(f"\nOptical-rotation is calculated at {instruction_or['method']} level.\n") + check = {True: "was successful", False: "FAILED"} + pl = config.lenconfx + 4 + len(str("/" + folder)) + + if calculate: + calculate = run_in_parallel( + config, + q, + resultq, + job, + config.maxthreads, + calculate, + instruction_or, + folder, + ) + calculate.sort(key=lambda x: int(x.id)) + try: + max_fmt = max( + [ + len(str(item.job["erange1"].get(config.freq_or[0])).split(".")[0]) + for item in calculate + ] + ) + max_fmt += 9 + except: + max_fmt = 16 + for conf in list(calculate): + line = ( + f"Optical-rotation calculation {check[conf.job['success']]}" + f" for {last_folders(conf.job['workdir'], 2):>{pl}} at {config.freq_or[0]} nm: " + f"{conf.job['erange1'].get(config.freq_or[0], 0.00):> {max_fmt}.7f}" + f" populated to {conf.bm_weight*100:.2f} %" + ) + print(line) + if not conf.job["success"]: + save_errors.append(line) + conf.optical_rotation_info["info"] = "failed" + conf.optical_rotation_info["method"] = instruction_or["method"] + conf.part_info["part5"] = "refused" + store_confs.append(calculate.pop(calculate.index(conf))) + else: + conf.optical_rotation_info["range"] = conf.job["erange1"] + conf.optical_rotation_info["info"] = "calculated" + conf.optical_rotation_info["method"] = instruction_or["method"] + conf.part_info["part5"] = "passed" + + if prev_calculated: + try: + max_fmt = max( + [ + len( + str( + item.optical_rotation_info["range"].get(config.freq_or[0]) + ).split(".")[0] + ) + for item in prev_calculated + ] + ) + max_fmt += 9 + except Exception as e: + print(e) + max_fmt = 16 + prev_calculated.sort(key=lambda x: int(x.id)) + for conf in list(prev_calculated): + conf.job["workdir"] = os.path.normpath( + os.path.join(config.cwd, "CONF" + str(conf.id), folder) + ) + print( + f"Optical-rotation calculation {check[True]} for " + f"{last_folders(conf.job['workdir'], 2):>{pl}} at {config.freq_or[0]} nm: " + f"{conf.optical_rotation_info['range'].get(config.freq_or[0], 0.0000):> {max_fmt}.7f}" + f" populated to {conf.bm_weight*100:.2f} % " + ) + calculate.append(prev_calculated.pop(prev_calculated.index(conf))) + for freq in config.freq_or: + averaged_or = 0.0 + for conf in calculate: + averaged_or += conf.bm_weight * conf.optical_rotation_info["range"].get( + freq, 0.0 + ) + print( + f"\nAveraged specific rotation at {freq} nm : " + f"{averaged_or: .3f} in deg*[dm(g/cc)]^(-1)" + ) + + if all( + [conf.lowlevel_gsolv_compare_info["std_dev"] is not None for conf in calculate] + ): + for freq in config.freq_or: + all_or = [] + for _ in range(1000): + averaged_or = 0.0 + for conf in calculate: + conf.calc_free_energy(e=energy, solv=gsolv, rrho=rrho) + conf.free_energy += normalvariate( + 0.0, conf.lowlevel_gsolv_compare_info["std_dev"] + ) + calculate = calc_boltzmannweights( + calculate, "free_energy", config.temperature + ) + for conf in calculate: + averaged_or += conf.bm_weight * conf.optical_rotation_info[ + "range" + ].get(freq) + all_or.append(averaged_or) + try: + max_fmt = max( + [ + len( + str( + item.optical_rotation_info["range"].get( + config.freq_or[0] + ) + ).split(".")[0] + ) + for item in calculate + ] + ) + max_fmt += 9 + except Exception as e: + print(e) + max_fmt = 16 + print( + f" SD based on SD of Gsolv (part2) " + f": {calc_std_dev(all_or):> {max_fmt}.3f} in deg*[dm(g/cc)]^(-1)" + ) + + for conf in calculate: + conf.reset_job_info() + + # save current data to jsonfile + config.write_json( + config.cwd, + [i.provide_runinfo() for i in calculate] + + [i.provide_runinfo() for i in prev_calculated] + + [i.provide_runinfo() for i in store_confs] + + [ensembledata], + config.provide_runinfo(), + ) + + if unoptimized_warning: + # Repeat for user to see! + print(f"\nWARNING: Conformers have not been optimized at DFT level!!!\n" + f" Use results with care!\n" + ) + if not calculate: + print("ERROR: No conformers left!") + print("Going to exit!") + sys.exit(1) + + # end printout for part5 + tmp = int((PLENGTH - len("END of Part5")) / 2) + print("\n" + "".ljust(tmp, ">") + "END of Part5" + "".rjust(tmp, "<")) + return config, calculate, store_confs, ensembledata diff --git a/censo_qm/optimization.py b/censo_qm/optimization.py new file mode 100755 index 0000000..52fe83b --- /dev/null +++ b/censo_qm/optimization.py @@ -0,0 +1,1945 @@ +""" +Optimization == part2 +performing optimization of the CRE and provide low level free energies. +""" +from multiprocessing import JoinableQueue as Queue +import shutil +import time +import os +import sys +from copy import deepcopy +from .cfg import PLENGTH, CODING, AU2KCAL, DIGILEN +from .utilities import ( + check_for_folder, + print_block, + new_folders, + last_folders, + ensemble2coord, + frange, + calc_boltzmannweights, + spearman, + printout, + move_recursively, + write_trj, + crest_routine, + check_tasks, + print, + calc_weighted_std_dev, +) +from .orca_job import OrcaJob +from .tm_job import TmJob +from .parallel import run_in_parallel + + +def part2(config, conformers, store_confs, ensembledata): + """ + Optimization of the ensemble, at DFT level (possibly with implicit solvation) + Calculate low level free energies with COSMO-RS single-point and gsolv + contribution and GFNFF-bhess thermostatistical contribution on DFT optimized + geometries + Input: + - config [conifg_setup object] contains all settings + - conformers [list of molecule_data objects] each conformer is represented + Return: + -> config + -> conformers + """ + save_errors = [] + print("\n" + "".ljust(PLENGTH, "-")) + print("CRE OPTIMIZATION - PART2".center(PLENGTH, " ")) + print("".ljust(PLENGTH, "-") + "\n") + # print flags for part2 + info = [] + info.append(["prog", "program"]) + info.append(["func", "functional for part2"]) + info.append(["basis", "basis set for part2"]) + info.append(["ancopt", "using the xTB-optimizer for optimization"]) + if config.opt_spearman: + info.append(["opt_spearman", "using the new ensemble optimizer"]) + info.append( + ["opt_limit", "completely optimize all conformers below this threshold"] + ) + info.append(["printoption", "Spearman threshold", f"{config.spearmanthr:.3f}"]) + info.append(["optcycles", "number of optimization iterations"]) + if config.func == "r2scan-3c": + info.append(["radsize", "radsize"]) + if config.ancopt and config.optlevel2 is not None: + info.append(["optlevel2", "optimization level in part2"]) + if config.solvent != "gas": + info.append(["solvent", "solvent"]) + info.append(["sm2", "solvent model applied in the optimization"]) + if config.smgsolv2 not in (None, "sm"): + info.append(["smgsolv2", "solvent model for Gsolv contribution"]) + info.append(["temperature", "temperature"]) + if config.multitemp: + info.append(["multitemp", "evalulate at different temperatures"]) + info.append( + [ + "printoption", + "temperature range", + [ + i + for i in frange( + config.trange[0], config.trange[1], config.trange[2] + ) + ], + ] + ) + info.append(["part2_threshold", "Boltzmann sum threshold for sorting in part2"]) + info.append(["evaluate_rrho", "calculate mRRHO contribution"]) + if config.evaluate_rrho: + info.append(["prog_rrho", "program for mRRHO contribution"]) + if config.prog_rrho == "xtb": + info.append(["part2_gfnv", "GFN version for mRRHO and/or GBSA_Gsolv"]) + if config.bhess: + info.append( + [ + "bhess", + "Apply constraint to input geometry during mRRHO calculation", + ] + ) + optionsexchange = {True: "on", False: "off"} + for item in info: + if item[0] == "justprint": + print(item[1:][0]) + else: + if item[0] == "printoption": + option = item[2] + else: + option = getattr(config, item[0]) + if option is True or option is False: + option = optionsexchange[option] + elif isinstance(option, list): + option = [str(i) for i in option] + if len(str(option)) > 40: + length = 0 + reduced = [] + for i in option: + length += len(i) + 2 + if length < 40: + reduced.append(i) + reduced.append("...") + option = reduced + length = 0 + option = ", ".join(option) + print( + "{}: {:{digits}} {}".format( + item[1], "", option, digits=DIGILEN - len(item[1]) + ) + ) + print("") + # end print + + calculate = [] # has to be calculated in this run + prev_calculated = [] # was already calculated in a previous run + try: + store_confs + except NameError: + store_confs = [] # stores all confs which are sorted out! + + if config.solvent == "gas": + print("Optimizing geometries at DFT level!") + else: + print("Optimizing geometries at DFT level with implicit solvation!") + + # setup queues + q = Queue() + resultq = Queue() + + if config.prog == "tm": + job = TmJob + elif config.prog == "orca": + job = OrcaJob + + for conf in list(conformers): + if conf.removed: + store_confs.append(conformers.pop(conformers.index(conf))) + print(f"CONF{conf.id} is removed as requested by the user!") + continue + if conf.id > config.nconf: + store_confs.append(conformers.pop(conformers.index(conf))) + continue + if conf.optimization_info["info"] == "not_calculated": + conf = conformers.pop(conformers.index(conf)) + calculate.append(conf) + elif conf.optimization_info["info"] == "failed": + conf = conformers.pop(conformers.index(conf)) + store_confs.append(conf) + print(f"Optimization of CONF{conf.id} failed in the previous run!") + elif conf.optimization_info["info"] == "prep-failed": + print( + f"Preparation for the optimization of CONF{conf.id} failed in the " + "previous run and is tried again!" + ) + conf = conformers.pop(conformers.index(conf)) + elif conf.optimization_info["info"] == "calculated": + conf = conformers.pop(conformers.index(conf)) + if conf.optimization_info["convergence"] == "converged": + conf.job["success"] = True + conf.job["ecyc"] = conf.optimization_info["ecyc"] + if conf.optimization_info.get("cregen_sort", "pass") == "pass": + prev_calculated.append(conf) + elif conf.optimization_info.get("cregen_sort", "pass") == "removed": + print( + f"CONF{conf.id} has been sorted out by CREGEN in a previous run." + ) + store_confs.append(conf) + else: + # "not_converged" or "stopped_before_converged" + store_confs.append(conf) + if not calculate and not prev_calculated: + print("ERROR: No conformers left!") + print("Going to exit!") + sys.exit(1) + if prev_calculated: + check_for_folder(config.cwd, [i.id for i in prev_calculated], config.func) + print("The optimization was performed before for:") + print_block(["CONF" + str(i.id) for i in prev_calculated]) + pl = config.lenconfx + 4 + len(str("/" + config.func)) + + instruction_prep = { + "jobtype": "prep", + "func": config.func, + "basis": getattr(config, "basis", config.func_basis_default[config.func]), + "charge": config.charge, + "unpaired": config.unpaired, + "solvent": config.solvent, + "sm": config.sm2, + "optlevel": config.optlevel2, + "omp": config.omp, + "copymos": "", + "energy": 0.0, + "energy2": 0.0, + "success": False, + } + + # INSTRUCTION OPT !!!! + instruction_opt = { + "func": config.func, + "basis": getattr(config, "basis", config.func_basis_default[config.func]), + "charge": config.charge, + "unpaired": config.unpaired, + "solvent": config.solvent, + "fullopt": True, # output to opt-part2.out + "converged": False, + "hlow": config.hlow, + "sm": config.sm2, + "omp": config.omp, + "optcycles": config.optcycles, + "optlevel": config.optlevel2, + "multiTemp": False, + "energy": 0.0, + "energy2": 0.0, + "success": False, + } + + instruction_rrho_crude = { + "jobtype": "rrhoxtb", + "func": getattr(config, "part2_gfnv"), + "gfn_version": getattr(config, "part2_gfnv"), + "temperature": config.temperature, + "charge": config.charge, + "unpaired": config.unpaired, + "solvent": config.solvent, + "omp": config.omp, + "progpath": config.external_paths["xtbpath"], + "bhess": config.bhess, + "sm_rrho": config.sm_rrho, + "rmsdbias": config.rmsdbias, + "cwd": config.cwd, + "consider_sym": config.consider_sym, + "energy": 0.0, + "energy2": 0.0, + "success": False, + } + instruction_rrho_crude["method"], _ = config.get_method_name( + "rrhoxtb", + bhess=config.bhess, + gfn_version=instruction_rrho_crude["gfn_version"], + sm=instruction_rrho_crude["sm_rrho"], + solvent=instruction_rrho_crude["solvent"], + ) + + # Set optlevel and scfconv stuff --------------------------------------- + # r2scan-3c has additional settings in tm_job._prep_cefine! + if config.optlevel2 in ("crude", "sloppy", "loose"): + instruction_prep["prepinfo"] = ["low"] + instruction_opt["prepinfo"] = ["low"] + elif config.optlevel2 == "lax": + instruction_prep["prepinfo"] = ["low"] + instruction_opt["prepinfo"] = ["low"] + elif config.optlevel2 == "normal": + instruction_prep["prepinfo"] = ["low+"] + instruction_opt["prepinfo"] = ["low+"] + elif config.optlevel2 in ("tight", "vtight", "extreme"): + instruction_prep["prepinfo"] = ["high"] + instruction_opt["prepinfo"] = ["high"] + else: + instruction_prep["prepinfo"] = ["low+"] + instruction_opt["prepinfo"] = ["low+"] + # ----------------------------------------------------------------------- + if config.ancopt: + instruction_opt["jobtype"] = "xtbopt" + instruction_opt["xtb_driver_path"] = config.external_paths["xtbpath"] + else: + instruction_opt["jobtype"] = "opt" + + if config.func == "r2scan-3c": + instruction_prep["prepinfo"].extend(["-radsize", str(config.radsize)]) + + instruction_opt["method"], _ = config.get_method_name( + instruction_opt["jobtype"], + func=instruction_opt["func"], + basis=instruction_opt["basis"], + solvent=instruction_opt["solvent"], + sm=instruction_opt["sm"], + ) + + check = {True: "was successful", False: "FAILED"} + if calculate: + print("The optimization is calculated for:") + print_block(["CONF" + str(i.id) for i in calculate]) + # create folders: + save_errors, store_confs, calculate = new_folders( + config.cwd, calculate, config.func, save_errors, store_confs + ) + # write coord to folder + calculate, store_confs, save_errors = ensemble2coord( + config, config.func, calculate, store_confs, save_errors + ) + + # parallel prep execution + calculate = run_in_parallel( + config, + q, + resultq, + job, + config.maxthreads, + calculate, + instruction_prep, + config.func, + ) + # check if too many calculations failed + + for conf in list(calculate): + if instruction_prep["jobtype"] == "prep": + line = ( + f"Preparation in {last_folders(conf.job['workdir'], 2):>{pl}} " + f"{check[conf.job['success']]}." + ) + if not conf.job["success"]: + save_errors.append(line) + conf.optimization_info["info"] = "prep-failed" + store_confs.append(calculate.pop(calculate.index(conf))) + + # save current data to jsonfile + config.write_json( + config.cwd, + [i.provide_runinfo() for i in calculate] + + [i.provide_runinfo() for i in prev_calculated] + + [i.provide_runinfo() for i in store_confs] + + [ensembledata], + config.provide_runinfo(), + ) + check_tasks(calculate, config.check) + else: + print("No conformers are considered additionally.") + # reset + for conf in calculate: + conf.reset_job_info() + # *************************************************************************** + # NEW ENSEMBLE OPTIMIZER: + if calculate: + print("Starting optimizations".center(70, "*")) + ### settings in instruction_opt are overwriting conf.job everytime,(while loop) + ### therefore dont write information which has to be reaccessed to it! + + run = 1 + timings = [] # time per cycle + cycle_spearman = [] # spearmanthr used in evaluation per cycle + nconf_cycle = [] # number of conformers at end of each cycle + + do_increase = 0.6 + if config.opt_limit * do_increase >= 1.5: + ewin_increase = config.opt_limit * do_increase + print( + f"\nStarting threshold is set to {config.opt_limit} + " + f"{do_increase*100} % = {config.opt_limit + ewin_increase} kcal/mol\n" + ) + else: + ewin_increase = 1.5 + print( + f"\nStarting threshold is set to {config.opt_limit} + " + f"{ewin_increase} kcal/mol = {config.opt_limit + ewin_increase} kcal/mol\n" + ) + ewin_initial = config.opt_limit + ewin_increase + ewin = config.opt_limit + ewin_increase + + print(f"Lower limit is set to {config.opt_limit} kcal/mol\n") + lower_limit = config.opt_limit + maxecyc_prev = 1 + maxecyc = 0 + converged_run1 = [] + if config.nat > 200: + # stopcycle = don't optimize more than stopcycle cycles + stopcycle = config.nat * 2 + else: + stopcycle = 200 + if config.opt_spearman: + while calculate: + tic = time.perf_counter() + print(f"CYCLE {str(run)}".center(70, "*")) + # if len(calculate) == 1: + + # if stopcycle - maxecyc <= 0: + # limit = config.optcycles + # else: + # limit = stopcycle - maxecyc + # instruction_opt["optcycles"] = limit + + # calculate batch of optimizations + calculate = run_in_parallel( + config, + q, + resultq, + job, + config.maxthreads, + calculate, + instruction_opt, + config.func, + ) + + # check if optimization crashed + for conf in list(calculate): + if not conf.job["success"]: + print(f"removing CONF{conf.id} because optimization crashed.") + conf.optimization_info["info"] = "failed" + conf.optimization_info["convergence"] = "not_converged" + conf.optimization_info["method"] = instruction_opt["method"] + store_confs.append(calculate.pop(calculate.index(conf))) + elif conf.job["success"]: + if conf.job["converged"]: + # don't optimize further: + print( + f"Geometry optimization converged for: " + f"CONF{conf.id} within {conf.job['cycles']:>3} cycles" + ) + conf.optimization_info["info"] = "calculated" + conf.optimization_info["energy"] = conf.job["energy"] + conf.optimization_info["cycles"] = conf.job["cycles"] + conf.optimization_info["ecyc"] = conf.job["ecyc"] + conf.optimization_info["decyc"] = conf.job["decyc"] + conf.optimization_info["convergence"] = "converged" + conf.optimization_info["method"] = instruction_opt["method"] + if run == 1: + converged_run1.append(conf.id) + else: + prev_calculated.append( + calculate.pop(calculate.index(conf)) + ) + else: + conf.optimization_info["energy"] = conf.job["energy"] + # optimization cycles didn't result in convergence + # optimize further + if not calculate: + toc = time.perf_counter() + timings.append(toc - tic) + cycle_spearman.append("") + nconf_cycle.append(len(calculate) + len(prev_calculated)) + break + if run == 1 and calculate and config.evaluate_rrho: + # run GmRRHO on crudely optimized geometry + folder_rrho_crude = os.path.join(config.func, "rrho_crude") + # create folders: + save_errors, store_confs, calculate = new_folders( + config.cwd, + calculate, + folder_rrho_crude, + save_errors, + store_confs, + ) + # copy optimized geoms to folder + for conf in list(calculate): + try: + tmp_from = os.path.join( + config.cwd, "CONF" + str(conf.id), config.func + ) + tmp_to = os.path.join( + config.cwd, "CONF" + str(conf.id), folder_rrho_crude + ) + shutil.copy( + os.path.join(tmp_from, "coord"), + os.path.join(tmp_to, "coord"), + ) + except shutil.SameFileError: + pass + except FileNotFoundError: + if not os.path.isfile(os.path.join(tmp_from, "coord")): + print( + "ERROR: while copying the coord file from {}! " + "The corresponding file does not exist.".format( + tmp_from + ) + ) + elif not os.path.isdir(tmp_to): + print( + "ERROR: Could not create folder {}!".format(tmp_to) + ) + print("ERROR: Removing conformer {}!".format(conf.name)) + conf.lowlevel_grrho_info["info"] = "prep-failed" + store_confs.append(calculate.pop(calculate.index(conf))) + save_errors.append( + f"CONF{conf.id} was removed, because IO failed!" + ) + # parallel execution: + calculate = run_in_parallel( + config, + q, + resultq, + job, + config.maxthreads, + calculate, + instruction_rrho_crude, + folder_rrho_crude, + ) + check = {True: "was successful", False: "FAILED"} + # check if too many calculations failed + ### + for conf in list(calculate): + print( + f"The G_mRRHO calculation on crudely optimized DFT " + f"geometry @ {conf.job['symmetry']} " + f"{check[conf.job['success']]} for " + f"{last_folders(conf.job['workdir'], 3):>{pl}}: " + f"{conf.job['energy']:>.8f}" + ) + if not conf.job["success"]: + store_confs.append(calculate.pop(calculate.index(conf))) + else: + conf.optimization_info["energy_rrho"] = conf.job["energy"] + conf.optimization_info[ + "method_rrho" + ] = instruction_rrho_crude["method"] + conf.optimization_info["info_rrho"] = "calculated" + + for conf in list(calculate): + if conf.id in converged_run1: + prev_calculated.append(calculate.pop(calculate.index(conf))) + if not calculate: + toc = time.perf_counter() + timings.append(toc - tic) + cycle_spearman.append("") + nconf_cycle.append(len(calculate) + len(prev_calculated)) + break + if run >= 2 and config.crestcheck: + # do sorting with cregen! + calculate, prev_calculated, store_confs = crest_routine( + config, + calculate, + config.func, + store_confs, + prev_calculated=prev_calculated, + ) + if not calculate: + toc = time.perf_counter() + timings.append(toc - tic) + cycle_spearman.append("") + nconf_cycle.append(len(calculate) + len(prev_calculated)) + break + maxecyc = max([len(conf.job["ecyc"]) for conf in calculate]) + print(f"Max number of performed iterations: {maxecyc}") + if len(calculate + prev_calculated) == 1: + # can't do spearman with only one conf + run_spearman = False + elif len(calculate) > 1 and run > 1: + run_spearman = True + else: + run_spearman = True + gesc = True # gESC with already good sorting + if run == 1 and gesc: + # only evaluate spearman starting from second cycle + print("Spearman rank evaluation is performed in the next cycle.") + cycle_spearman.append("") + run_spearman = False + + elif run == 1 and not gesc: + # only evaluate spearman starting from second cycle + print("Spearman rank evaluation is performed in the next cycle.") + cycle_spearman.append("") + run_spearman = False + run += 1 + toc = time.perf_counter() + timings.append(toc - tic) + nconf_cycle.append(len(calculate) + len(prev_calculated)) + print(f"CYCLE {run} performed in {toc -tic:0.4f} seconds") + continue + + # lists of equal lenght: + for conf in sorted( + calculate + prev_calculated, key=lambda x: int(x.id) + ): + if len(conf.job["ecyc"]) < maxecyc: + for _ in range(maxecyc - len(conf.job["ecyc"])): + conf.job["ecyc"].append(conf.job["ecyc"][-1]) + + # calculate min of each cycle: + minecyc = [] + if config.evaluate_rrho: + rrho_energy = "energy_rrho" + else: + rrho_energy = "axqzv" # to get 0.0 contribution + for i in range(maxecyc): + try: + minecyc.append( + min( + [ + conf.job["ecyc"][i] + + getattr(conf, "optimization_info").get( + rrho_energy, 0.0 + ) + for conf in calculate + prev_calculated + if conf.job["ecyc"][i] is not None + ] + ) + ) + except (ValueError) as e: + minecyc.append(0.0) + print(e) + # evalulate ΔE + for conf in sorted( + calculate + prev_calculated, key=lambda x: int(x.id) + ): + conf.job["decyc"] = [] + for i in range(maxecyc): + conf.job["decyc"].append( + ( + conf.job["ecyc"][i] + + getattr(conf, "optimization_info").get( + rrho_energy, 0.0 + ) + - minecyc[i] + ) + * AU2KCAL + ) + if run == 1: + print("") + for conf in sorted( + calculate + prev_calculated, key=lambda x: int(x.id) + ): + print( + f"CONF{conf.id :<{config.lenconfx}} initial ΔG = " + f"{conf.job['decyc'][0]:^5.2f} kcal/mol and " + f"current ΔG = {conf.job['decyc'][-1]:^5.2f} kcal/mol." + f" ({conf.optimization_info['convergence']})" + ) + previouscycle = maxecyc + print("") + else: + print("") + for conf in sorted( + calculate + prev_calculated, key=lambda x: int(x.id) + ): + print( + f"CONF{conf.id :<{config.lenconfx}} previous ΔG = " + f"{conf.job['decyc'][previouscycle-1]:^5.2f} kcal/mol and " + f"current ΔG = {conf.job['decyc'][-1]:^5.2f} kcal/mol." + f" ({conf.optimization_info['convergence']})" + ) + previouscycle = maxecyc + print("") + if run_spearman: + num_eval = 3 + try: + toevaluate = [] + for i in range(maxecyc_prev, maxecyc): + if i + num_eval <= maxecyc: + toevaluate.append(i) + _ = max(toevaluate) + digits1 = 4 + except ValueError: + # need to do another optimization cycle + run += 1 + toc = time.perf_counter() + timings.append(toc - tic) + cycle_spearman.append("") + nconf_cycle.append(len(calculate) + len(prev_calculated)) + print(f"CYCLE {run} performed in {toc -tic:0.4f} seconds") + continue + + evalspearman = [] + for i in toevaluate: + deprevious = [ + conf.job["decyc"][i - 1] + for conf in sorted( + calculate + prev_calculated, key=lambda x: int(x.id) + ) + ] + decurrent = [ + conf.job["decyc"][i - 1 + num_eval] + for conf in sorted( + calculate + prev_calculated, key=lambda x: int(x.id) + ) + ] + + spearman_v = spearman(deprevious, decurrent) + if i in toevaluate[-2:]: + print( + f"Evaluating Spearman coeff. from {i:>{digits1}} --> " + f"{i+num_eval:>{digits1}}" + f" = {spearman_v:>.4f}" + ) + evalspearman.append(spearman_v) + else: + print( + f"{'':>10} Spearman coeff. from {i:>{digits1}} --> " + f"{i+num_eval:>{digits1}}" + f" = {spearman_v:>.4f}" + ) + print( + f"Final averaged Spearman correlation coefficient: " + f"{(sum(evalspearman)/2):>.4f}" + ) + + if ( + len(evalspearman) >= 2 + and sum(evalspearman) / 2 >= config.spearmanthr + ): + print("\nPES is assumed to be parallel") + # adjust threshold Ewin: + if ewin > lower_limit: + if (ewin - (ewin_increase / 3)) < lower_limit: + ewin = lower_limit + else: + ewin += -(ewin_increase / 3) + print( + f"Updated optimization threshold to: {ewin:.2f} kcal/mol" + ) + else: + print( + f"Current optimization threshold: {ewin:.2f} kcal/mol" + ) + cycle_spearman.append(f"{sum(evalspearman)/2:.3f}") + + for conf in list(calculate): + if conf.job["decyc"][-1] > ewin and conf.job["grad_norm"] < 0.01: + print( + f"CONF{conf.id} is above {ewin} kcal/mol and gradient " + f"norm ({conf.job['grad_norm']}) is below {0.01}." + ) + if conf.job["decyc"][-1] < ewin_initial: + print( + f"CONF{conf.id} is removed because of the " + "lowered threshold!" + ) + # transfer energies and remove conf + conf.optimization_info["energy"] = conf.job["energy"] + conf.optimization_info["info"] = "calculated" + conf.optimization_info["cycles"] = conf.job["cycles"] + conf.optimization_info["ecyc"] = conf.job["ecyc"] + conf.optimization_info["decyc"] = conf.job["decyc"] + conf.optimization_info["method"] = instruction_opt["method"] + conf.optimization_info[ + "convergence" + ] = "stopped_before_converged" + print( + f"CONF{conf.id} is above threshold, dont optimize " + f"further and remove conformer." + ) + store_confs.append(calculate.pop(calculate.index(conf))) + elif conf.job["decyc"][-1] > ewin and conf.job["grad_norm"] > 0.01: + print( + f"CONF{conf.id} is above {ewin} kcal/mol but " + f"gradient norm ({conf.job['grad_norm']}) is " + f"above {0.01} --> not sorted out!" + ) + toc = time.perf_counter() + timings.append(toc - tic) + nconf_cycle.append(len(calculate) + len(prev_calculated)) + print(f"\nCYCLE {run} performed in { toc - tic:0.4f} seconds") + # + if maxecyc >= stopcycle: + print("") + for conf in list(calculate): + # don't optimize further: + print( + f"!!! Geometry optimization STOPPED because of " + f"optcycle limit of {stopcycle} cycles reached for: " + f"CONF{conf.id} within {conf.job['cycles']:>3} cycles" + ) + conf.optimization_info["info"] = "calculated" + conf.optimization_info["energy"] = conf.job["energy"] + conf.optimization_info["cycles"] = conf.job["cycles"] + conf.optimization_info["ecyc"] = conf.job["ecyc"] + conf.optimization_info["decyc"] = conf.job["decyc"] + conf.optimization_info[ + "convergence" + ] = "converged" #### THIS IS NOT CORRECT! + conf.optimization_info["method"] = instruction_opt["method"] + prev_calculated.append(calculate.pop(calculate.index(conf))) + # + maxecyc_prev = maxecyc + run += 1 + # END while loop + else: + # use standard optimization! + # update instruct_opt + tic = time.perf_counter() + del instruction_opt["optcycles"] + # calculate first round + calculate = run_in_parallel( + config, + q, + resultq, + job, + config.maxthreads, + calculate, + instruction_opt, + config.func, + ) + # check if optimization crashed + for conf in list(calculate): + if not conf.job["success"]: + print(f"removing CONF{conf.id} because optimization crashed.") + conf.optimization_info["info"] = "failed" + conf.optimization_info["convergence"] = "not_converged" + conf.optimization_info["method"] = instruction_opt["method"] + store_confs.append(calculate.pop(calculate.index(conf))) + elif conf.job["success"]: + if conf.job["converged"]: + # don't optimize further: + conf.optimization_info["info"] = "calculated" + conf.optimization_info["energy"] = conf.job["energy"] + conf.optimization_info["cycles"] = conf.job["cycles"] + conf.optimization_info["ecyc"] = conf.job["ecyc"] + conf.optimization_info["decyc"] = conf.job["decyc"] + conf.optimization_info["convergence"] = "converged" + conf.optimization_info["method"] = instruction_opt["method"] + # prev_calculated to keep it consistent with new ensemble optimizer + prev_calculated.append(calculate.pop(calculate.index(conf))) + else: + print(f"ERROR! CONF{conf.id} fell through sorting") + toc = time.perf_counter() + timings.append(toc - tic) + # ********************end standard optimization ********************* + print("Finished optimizations!".center(70, "*")) + if config.opt_spearman and ( + len(timings) == len(cycle_spearman) == len(nconf_cycle) + ): + try: + tl = max([len(f"{i: .2f}") for i in timings]) + if tl > 7: + tmp1 = tl + else: + tmp1 = 7 + print("Timings:") + print(f"Cycle: [s] {'#nconfs':^{tmp1}} Spearman coeff.") + for i in range(len(timings)): + print( + f"{i+1:>4} {timings[i]:> {tl}.2f} {nconf_cycle[i]:^{tmp1}} {cycle_spearman[i]}" + ) + print("sum: {:> .2f}".format(sum(timings))) + except Exception as e: + print(e) + else: + print("Timings:") + print("Cycle: [s]") + for i in timings: + print("{:4} {:>.2f}".format(timings.index(i) + 1, i)) + print("sum: {:>.2f}".format(sum(timings))) + print("\nCONVERGED optimizations for the following remaining conformers:") + prev_calculated.sort(key=lambda x: int(x.id)) + # end if calculate-- + for conf in list(prev_calculated): + print( + f"Converged optimization for {'CONF' + str(conf.id):{config.lenconfx+4}} " + f"after {conf.optimization_info['cycles'] :>3} cycles: " + f"{conf.optimization_info['energy']:>.7f}" + ) + calculate.append(prev_calculated.pop(prev_calculated.index(conf))) + + ensembledata.nconfs_per_part["part2_opt"] = len(calculate) + # save current data to jsonfile + config.write_json( + config.cwd, + [i.provide_runinfo() for i in calculate] + + [i.provide_runinfo() for i in prev_calculated] + + [i.provide_runinfo() for i in store_confs] + + [ensembledata], + config.provide_runinfo(), + ) + if config.crestcheck: + calculate, prev_calculated, store_confs = crest_routine( + config, calculate, config.func, store_confs + ) + + # reset + for conf in calculate: + conf.reset_job_info() + if not calculate and not prev_calculated: + print("ERROR: No conformers left!") + print("Going to exit!") + sys.exit(1) + # ******************************Optimization done**************************** + # Start Gsolv (COSMO-RS) calculation (or only gas phase single-point) + instruction_gsolv = { + "func": config.func, + "prepinfo": ["low+"], # TM m4 scfconv6 + "basis": getattr(config, "basis", config.func_basis_default[config.func]), + "charge": config.charge, + "unpaired": config.unpaired, + "solvent": config.solvent, + "sm": config.smgsolv2, + "omp": config.omp, + "temperature": config.temperature, + "energy": 0.0, + "energy2": 0.0, + "success": False, + "gfn_version": config.part2_gfnv, + } + if config.multitemp: + instruction_gsolv["trange"] = [ + i for i in frange(config.trange[0], config.trange[1], config.trange[2]) + ] + else: + instruction_gsolv["trange"] = [] + if config.solvent == "gas": + print("\nCalculating single-point energies!") + instruction_gsolv["jobtype"] = "sp" + # instruction_gsolv["prepinfo"] = ["low+"] + instruction_gsolv["method"], _ = config.get_method_name( + instruction_gsolv["jobtype"], + func=instruction_gsolv["func"], + basis=instruction_gsolv["basis"], + ) + folder = instruction_gsolv["func"] + name = "lowlevel single-point" + else: + print( + "\nCalculating single-point energies and solvation contribution (G_solv)!" + ) + if config.smgsolv2 in config.smgsolv_2: + # additive Gsolv + # COSMO-RS + if "cosmors" in config.smgsolv2 and config.smgsolv2 != "dcosmors": + job = TmJob + instruction_gsolv["prepinfo"] = ["low+"] + exc_fine = {"cosmors": "normal", "cosmors-fine": "fine"} + tmp = { + "jobtype": "cosmors", + "cosmorssetup": config.external_paths["cosmorssetup"], + "cosmorsparam": exc_fine.get(config.smgsolv2, "normal"), + "cosmothermversion": config.external_paths["cosmothermversion"], + "copymos": str(instruction_gsolv["func"]), + } + instruction_gsolv.update(tmp) + instruction_gsolv["method"], instruction_gsolv[ + "method2" + ] = config.get_method_name( + "cosmors", + func=instruction_gsolv["func"], + basis=instruction_gsolv["basis"], + sm=instruction_gsolv["sm"], + ) + folder = str(instruction_gsolv["func"]) + "/COSMO" + name = "lowlevel COSMO-RS" + # GBSA-Gsolv / ALPB-Gsolv + elif config.smgsolv2 in ("gbsa_gsolv", "alpb_gsolv"): + instruction_gsolv["jobtype"] = instruction_gsolv["sm"] + if config.prog == "orca": + instruction_gsolv["progpath"] = config.external_paths["orcapath"] + instruction_gsolv["xtb_driver_path"] = config.external_paths["xtbpath"] + instruction_gsolv["method"], instruction_gsolv[ + "method2" + ] = config.get_method_name( + instruction_gsolv["jobtype"], + func=instruction_gsolv["func"], + basis=instruction_gsolv["basis"], + sm=instruction_gsolv["sm"], + gfn_version=instruction_gsolv["gfn_version"], + ) + if ( + conf.lowlevel_sp_info["info"] == "calculated" + and conf.lowlevel_sp_info["method"] == instruction_gsolv["method"] + ): + # do not calculate gas phase sp again! + instruction_gsolv["energy"] = conf.lowlevel_sp_info["energy"] + instruction_gsolv["prepinfo"] = [] + # else: + # instruction_gsolv["prepinfo"] = ["low+"] + name = "lowlevel additive solvation" + folder = str(instruction_gsolv["func"]) + "/Gsolv2" + # SMD_Gsolv + elif config.smgsolv2 == "smd_gsolv": + job = OrcaJob + instruction_gsolv["jobtype"] = "smd_gsolv" + # instruction_gsolv["prepinfo"] = ["low+"] + instruction_gsolv["progpath"] = config.external_paths["orcapath"] + instruction_gsolv["method"], instruction_gsolv[ + "method2" + ] = config.get_method_name( + "smd_gsolv", + func=instruction_gsolv["func"], + basis=instruction_gsolv["basis"], + sm=instruction_gsolv["sm"], + ) + name = "lowlevel SMD_Gsolv" + folder = str(instruction_gsolv["func"]) + "/Gsolv2" + else: + # with implicit solvation + instruction_gsolv["jobtype"] = "sp_implicit" + # instruction_gsolv["prepinfo"] = ["low+"] + if config.prog == "orca": + instruction_gsolv["progpath"] = config.external_paths["orcapath"] + instruction_gsolv["method"], instruction_gsolv[ + "method2" + ] = config.get_method_name( + "sp_implicit", + func=instruction_gsolv["func"], + basis=instruction_gsolv["basis"], + sm=instruction_gsolv["sm"], + ) + name = "lowlevel single-point" + folder = instruction_gsolv["func"] + + for conf in list(calculate): + if conf.removed: + store_confs.append(calculate.pop(calculate.index(conf))) + print(f"CONF{conf.id} is removed as requested by the user!") + continue + if conf.lowlevel_sp_info["info"] == "failed": + conf = calculate.pop(calculate.index(conf)) + store_confs.append(conf) + print(f"Calculation of CONF{conf.id} failed in the previous run!") + elif conf.lowlevel_sp_info["info"] == "not_calculated": + # has to be calculated now + # take opt sp as lowlevel sp ! + if conf.optimization_info["method"] == instruction_gsolv["method"]: + if conf.optimization_info.get("method", "not_found") == "calculated": + conf = calculate.pop(calculate.index(conf)) + conf.lowlevel_sp_info["info"] = "calculated" + conf.lowlevel_sp_info["method"] = instruction_gsolv["method"] + conf.lowlevel_sp_info["energy"] = conf.optimization_info["energy"] + conf.job["success"] = True + prev_calculated.append(conf) + continue + elif conf.lowlevel_sp_info["info"] == "prep-failed": + print( + f"Preparation step for CONF{conf.id} failed in the previous " + "run and is retried now!" + ) + # is retried now! + elif conf.lowlevel_sp_info["info"] == "calculated": + conf = calculate.pop(calculate.index(conf)) + if config.solvent != "gas": + # check if solvation calculation is calculated as well + if conf.lowlevel_gsolv_info["info"] == "failed": + store_confs.append(conf) + print( + f"Calculation of the solvation contribution for CONF" + f"{conf.id} failed in the previous run!" + ) + elif conf.lowlevel_gsolv_info["info"] == "not_calculated": + calculate.append(conf) + elif conf.lowlevel_gsolv_info["info"] == "calculated": + conf.job["success"] = True + prev_calculated.append(conf) + else: + print("UNEXPECTED BEHAVIOUR") + elif config.solvent == "gas": + conf.job["success"] = True + prev_calculated.append(conf) + else: + print("\nMISSING STUFF!\n") + + if not calculate and not prev_calculated: + print("ERROR: No conformers left!") + print("Going to exit!") + sys.exit(1) + if prev_calculated: + check_for_folder(config.cwd, [i.id for i in prev_calculated], folder) + if config.solvent == "gas": + print("The low level_single-point was calculated before for:") + else: + print("The low level gsolv calculation was calculated before for:") + print_block(["CONF" + str(i.id) for i in prev_calculated]) + pl = config.lenconfx + 4 + len(str("/" + folder)) + + check = {True: "was successful", False: "FAILED"} + if calculate: + if config.solvent == "gas": + print("The low level_single-point is now calculated for:") + if config.solvent != "gas" and config.smgsolv2 in config.smgsolv_2: + print("The low level gsolv calculation is now calculated for:") + # need to create folders + save_errors, store_confs, calculate = new_folders( + config.cwd, calculate, folder, save_errors, store_confs + ) + # need to copy optimized coord to COSMO/GSOLV2 folder + for conf in calculate: + tmp1 = os.path.join( + config.cwd, + "CONF" + str(conf.id), + instruction_gsolv["func"], + "coord", + ) + tmp2 = os.path.join("CONF" + str(conf.id), folder, "coord") + try: + shutil.copy(tmp1, tmp2) + except FileNotFoundError: + print("ERROR can't copy optimized geometry!") + print_block(["CONF" + str(i.id) for i in calculate]) + # parallel execution: + calculate = run_in_parallel( + config, + q, + resultq, + job, + config.maxthreads, + calculate, + instruction_gsolv, + folder, + ) + + for conf in list(calculate): + if instruction_gsolv["jobtype"] == "sp": + line = ( + f"{name} calculation {check[conf.job['success']]}" + f" for {last_folders(conf.job['workdir'], 2):>{pl}}: " + f"{conf.job['energy']:>.8f}" + ) + print(line) + if not conf.job["success"]: + save_errors.append(line) + conf.lowlevel_sp_info["method"] = instruction_gsolv["method"] + conf.lowlevel_sp_info["info"] = "failed" + store_confs.append(calculate.pop(calculate.index(conf))) + else: + conf.lowlevel_sp_info["energy"] = conf.job["energy"] + conf.lowlevel_sp_info["info"] = "calculated" + conf.lowlevel_sp_info["method"] = instruction_gsolv["method"] + elif instruction_gsolv["jobtype"] == "sp_implicit": + line = ( + f"{name} calculation {check[conf.job['success']]} for " + f"{last_folders(conf.job['workdir'], 2):>{pl}}: " + f"{conf.job['energy']:>.8f}" + ) + print(line) + if not conf.job["success"]: + save_errors.append(line) + conf.lowlevel_sp_info["info"] = "failed" + conf.lowlevel_sp_info["method"] = conf.job["method"] + store_confs.append(calculate.pop(calculate.index(conf))) + else: + conf.lowlevel_sp_info["energy"] = conf.job["energy"] + conf.lowlevel_sp_info["info"] = "calculated" + conf.lowlevel_sp_info["method"] = conf.job["method"] + elif instruction_gsolv["jobtype"] in ( + "cosmors", + "smd_gsolv", + "gbsa_gsolv", + "alpb_gsolv", + ): + line = ( + f"{name} calculation {check[conf.job['success']]} for " + f"{last_folders(conf.job['workdir'], 3):>{pl}}: " + f"{conf.job['energy2']:>.8f}" + ) + print(line) + if not conf.job["success"]: + save_errors.append(line) + conf.lowlevel_sp_info["info"] = "failed" + conf.lowlevel_sp_info["method"] = conf.job["method"] + conf.lowlevel_gsolv_info["info"] = "failed" + conf.lowlevel_gsolv_info["method"] = conf.job["method2"] + store_confs.append(calculate.pop(calculate.index(conf))) + else: + conf.lowlevel_sp_info["energy"] = conf.job["energy"] + conf.lowlevel_sp_info["info"] = "calculated" + conf.lowlevel_sp_info["method"] = instruction_gsolv["method"] + conf.lowlevel_gsolv_info["energy"] = conf.job["energy2"] + conf.lowlevel_gsolv_info["gas-energy"] = conf.job["energy"] + conf.lowlevel_gsolv_info["info"] = "calculated" + conf.lowlevel_gsolv_info["method"] = instruction_gsolv["method2"] + conf.lowlevel_gsolv_info["range"] = conf.job["erange1"] + else: + print( + f'UNEXPECTED BEHAVIOUR: {conf.job["success"]} {conf.job["jobtype"]}' + ) + # save current data to jsonfile + config.write_json( + config.cwd, + [i.provide_runinfo() for i in calculate] + + [i.provide_runinfo() for i in prev_calculated] + + [i.provide_runinfo() for i in store_confs] + + [ensembledata], + config.provide_runinfo(), + ) + check_tasks(calculate, config.check) + else: + print("No conformers are considered additionally.") + # adding conformers calculated before: + if prev_calculated: + for conf in list(prev_calculated): + conf.job["workdir"] = os.path.normpath( + os.path.join(config.cwd, "CONF" + str(conf.id), folder) + ) + if instruction_gsolv["jobtype"] in ("sp", "sp_implicit"): + print( + f"{name} calculation {check[conf.job['success']]} for " + f"{last_folders(conf.job['workdir'], 2):>{pl}}: " + f"{conf.lowlevel_sp_info['energy']:>.7f}" + ) + elif instruction_gsolv["jobtype"] in ( + "cosmors", + "smd_gsolv", + "gbsa_gsolv", + "alpb_gsolv", + ): + print( + f"{name} calculation {check[conf.job['success']]} for " + f"{last_folders(conf.job['workdir'], 3):>{pl}}: " + f"{conf.lowlevel_gsolv_info['energy']:>.7f}" + ) + calculate.append(prev_calculated.pop(prev_calculated.index(conf))) + # reset + for conf in calculate: + conf.reset_job_info() + if not calculate: + print("ERROR: No conformers left!") + print("Going to exit!") + sys.exit(1) + + # *************************************************************************** + # Starting grrho calculation on DFT geometry (bhess) + if config.evaluate_rrho: + if config.solvent == "gas": + print("\nCalculating lowlevel G_mRRHO on DFT geometry!") + else: + print( + "\nCalculating lowlevel G_mRRHO with implicit solvation " + "on DFT geometry!" + ) + for conf in list(calculate): + if conf.lowlevel_grrho_info["info"] == "not_calculated": + pass + if conf.lowlevel_grrho_info["info"] == "prep-failed": + # try again + pass + elif conf.lowlevel_grrho_info["info"] == "failed": + conf = calculate.pop(calculate.index(conf)) + conf.__class__ = job + store_confs.append(conf) + print(f"Calculation of CONF{conf.id} failed in the previous run!") + elif conf.lowlevel_grrho_info["info"] == "calculated": + conf = calculate.pop(calculate.index(conf)) + conf.__class__ = job + conf.job["success"] = True + prev_calculated.append(conf) + + if not calculate and not prev_calculated: + print("ERROR: No conformers left!") + print("Going to exit!") + sys.exit(1) + folderrho = "rrho_part2" + if prev_calculated: + check_for_folder(config.cwd, [i.id for i in prev_calculated], folderrho) + print("The G_mRRHO calculation was performed before for:") + print_block(["CONF" + str(i.id) for i in prev_calculated]) + pl = config.lenconfx + 4 + len(str("/" + folderrho)) + instruction_rrho = { + "jobtype": "rrhoxtb", + "func": getattr(config, "part2_gfnv"), + "gfn_version": getattr(config, "part2_gfnv"), + "temperature": config.temperature, + "charge": config.charge, + "unpaired": config.unpaired, + "solvent": config.solvent, + "omp": config.omp, + "progpath": config.external_paths["xtbpath"], + "bhess": config.bhess, + "sm_rrho": config.sm_rrho, + "rmsdbias": config.rmsdbias, + "cwd": config.cwd, + "consider_sym": config.consider_sym, + "energy": 0.0, + "energy2": 0.0, + "success": False, + } + instruction_rrho["method"], _ = config.get_method_name( + "rrhoxtb", + bhess=config.bhess, + gfn_version=instruction_rrho["gfn_version"], + sm=instruction_rrho["sm_rrho"], + solvent=instruction_rrho["solvent"], + ) + if config.multitemp: + instruction_rrho["trange"] = [ + i for i in frange(config.trange[0], config.trange[1], config.trange[2]) + ] + else: + instruction_rrho["trange"] = [] + + if calculate: + print("The lowlevel G_mRRHO calculation is now performed for:") + print_block(["CONF" + str(i.id) for i in calculate]) + # create folders: + save_errors, store_confs, calculate = new_folders( + config.cwd, calculate, folderrho, save_errors, store_confs + ) + # copy optimized geoms to folder + for conf in list(calculate): + try: + tmp_from = os.path.join( + config.cwd, "CONF" + str(conf.id), config.func + ) + tmp_to = os.path.join(config.cwd, "CONF" + str(conf.id), folderrho) + shutil.copy( + os.path.join(tmp_from, "coord"), os.path.join(tmp_to, "coord") + ) + except shutil.SameFileError: + pass + except FileNotFoundError: + if not os.path.isfile(os.path.join(tmp_from, "coord")): + print( + "ERROR: while copying the coord file from {}! " + "The corresponding file does not exist.".format(tmp_from) + ) + elif not os.path.isdir(tmp_to): + print("ERROR: Could not create folder {}!".format(tmp_to)) + print("ERROR: Removing conformer {}!".format(conf.name)) + conf.lowlevel_grrho_info["info"] = "prep-failed" + store_confs.append(calculate.pop(calculate.index(conf))) + save_errors.append(f"CONF{conf.id} was removed, because IO failed!") + # parallel execution: + calculate = run_in_parallel( + config, + q, + resultq, + job, + config.maxthreads, + calculate, + instruction_rrho, + folderrho, + ) + check = {True: "was successful", False: "FAILED"} + # check if too many calculations failed + + ### + for conf in list(calculate): + print( + f"The lowlevel G_mRRHO calculation @ {conf.job['symmetry']} " + f"{check[conf.job['success']]} for " + f"{last_folders(conf.job['workdir'], 2):>{pl}}: " + f"{conf.job['energy']:>.8f}" + ) + if not conf.job["success"]: + conf.lowlevel_grrho_info["info"] = "failed" + store_confs.append(calculate.pop(calculate.index(conf))) + else: + conf.sym = conf.job["symmetry"] + conf.lowlevel_grrho_info["rmsd"] = conf.job["rmsd"] + conf.lowlevel_grrho_info["energy"] = conf.job["energy"] + conf.lowlevel_grrho_info["info"] = "calculated" + conf.lowlevel_grrho_info["method"] = instruction_rrho["method"] + conf.lowlevel_grrho_info["range"] = conf.job["erange1"] + conf.lowlevel_hrrho_info["range"] = conf.job["erange2"] + conf.lowlevel_hrrho_info["info"] = "calculated" + conf.lowlevel_hrrho_info["method"] = instruction_rrho["method"] + # save current data to jsonfile + config.write_json( + config.cwd, + [i.provide_runinfo() for i in calculate] + + [i.provide_runinfo() for i in prev_calculated] + + [i.provide_runinfo() for i in store_confs] + + [ensembledata], + config.provide_runinfo(), + ) + check_tasks(calculate, config.check) + else: + print("No conformers are considered additionally.") + # adding conformers calculated before: + if prev_calculated: + for conf in list(prev_calculated): + conf.job["workdir"] = os.path.normpath( + os.path.join(config.cwd, "CONF" + str(conf.id), folderrho) + ) + print( + f"The lowlevel G_mRRHO calculation @ {conf.sym} " + f"{check[conf.job['success']]} for " + f"{last_folders(conf.job['workdir'], 2):>{pl}}: " + f"{conf.lowlevel_grrho_info['energy']:>.8f}" + ) + calculate.append(prev_calculated.pop(prev_calculated.index(conf))) + if not calculate: + print("ERROR: No conformers left!") + print("Going to exit!") + sys.exit(1) + + # printout for part2 ------------------------------------------------------- + print("\n" + "".ljust(int(PLENGTH / 2), "-")) + print("* Gibbs free energies of part2 *".center(int(PLENGTH / 2), " ")) + print("".ljust(int(PLENGTH / 2), "-") + "\n") + columncall = [ + lambda conf: "CONF" + str(getattr(conf, "id")), + lambda conf: getattr(conf, "xtb_energy"), + lambda conf: getattr(conf, "rel_xtb_energy"), + lambda conf: getattr(conf, "lowlevel_sp_info")["energy"], + lambda conf: getattr(conf, "lowlevel_gsolv_info")["energy"], + lambda conf: getattr(conf, "lowlevel_grrho_info")["energy"], + lambda conf: getattr(conf, "free_energy"), + lambda conf: getattr(conf, "rel_free_energy"), + lambda conf: getattr(conf, "bm_weight") * 100, + ] + columnheader = [ + "CONF#", + "E(GFNn-xTB)", + "ΔE(GFNn-xTB)", + "E [Eh]", + "Gsolv [Eh]", + "GmRRHO [Eh]", + "Gtot", + "ΔGtot", + "Boltzmannweight", + ] + columndescription = [ + "", + "[a.u.]", + "[kcal/mol]", + "", + "", # Gsolv + "", + "[Eh]", + "[kcal/mol]", + f" % at {config.temperature:.2f} K", + ] + columndescription2 = ["", "", "", "", "", "", "", "", ""] + columnformat = [ + "", + (12, 7), + (5, 2), + (12, 7), + (12, 7), + (12, 7), + (12, 7), + (5, 2), + (5, 2), + ] + if config.solvent == "gas": + # Energy + columndescription[3] = instruction_gsolv["method"] + elif config.solvent != "gas": + # Energy + columndescription[3] = instruction_gsolv["method"] + # Gsolv + columndescription[4] = instruction_gsolv["method2"] + if config.evaluate_rrho: + # Grrho + columndescription[5] = instruction_rrho["method"] + if not config.evaluate_rrho or config.solvent == "gas": + if not config.evaluate_rrho: + # ignore rrho in printout + columncall.pop(5) + columnheader.pop(5) + columndescription.pop(5) + columnformat.pop(5) + if config.solvent == "gas": + # ignore Gsolv + columncall.pop(4) + columnheader.pop(4) + columndescription.pop(4) + columnformat.pop(4) + + for conf in calculate: + if not config.evaluate_rrho: + rrho = None + else: + rrho = "lowlevel_grrho_info" + if config.solvent == "gas": + solv = None + else: + solv = "lowlevel_gsolv_info" + e = "lowlevel_sp_info" + conf.calc_free_energy(e=e, solv=solv, rrho=rrho) + + calculate = calc_boltzmannweights(calculate, "free_energy", config.temperature) + try: + minfree = min([i.free_energy for i in calculate if i is not None]) + except ValueError: + raise ValueError + for conf in calculate: + conf.rel_free_energy = (conf.free_energy - minfree) * AU2KCAL + if conf.free_energy == minfree: + lowestconf = conf.id + ensembledata.bestconf["part2"] = conf.id + + calculate.sort(key=lambda x: int(x.id)) + printout( + os.path.join(config.cwd, "part2.dat"), + columncall, + columnheader, + columndescription, + columnformat, + calculate, + minfree, + columndescription2=columndescription2, + ) + # printout for part2 ------------------------------------------------------- + + # *************************************************************************** + # SD on solvation: + # DCOSMO-RS_GSOLV + instruction_gsolv_compare = { + "func": config.func, + "basis": getattr(config, "basis", config.func_basis_default[config.func]), + "charge": config.charge, + "unpaired": config.unpaired, + "solvent": config.solvent, + "sm": "alpb_gsolv", + "omp": config.omp, + "temperature": config.temperature, + "energy": 0.0, + "energy2": 0.0, + "success": False, + "gfn_version": config.part2_gfnv, + } + instruction_gsolv_compare["trange"] = [] + instruction_gsolv_compare["prepinfo"] = [] + instruction_gsolv_compare["xtb_driver_path"] = config.external_paths["xtbpath"] + instruction_gsolv_compare["jobtype"] = instruction_gsolv_compare["sm"] + _, instruction_gsolv_compare["method"] = config.get_method_name( + instruction_gsolv_compare["jobtype"], + func=instruction_gsolv_compare["func"], + basis=instruction_gsolv_compare["basis"], + sm=instruction_gsolv_compare["sm"], + gfn_version=instruction_gsolv_compare["gfn_version"], + ) + folder_compare = "alpb_gsolv" + name = "alpb_gsolv".upper() + pl = config.lenconfx + 4 + len(str("/" + folder_compare)) + if ( + config.solvent != "gas" + and config.sm2 == "dcosmors" + and config.smgsolv2 in ("cosmors", "cosmors-fine") + ): + dorun = True + while dorun: + print( + "\nCalculating ALPB_Gsolv values for evaluation of the std. dev. of Gsolv." + ) + for conf in calculate: + if conf.id == ensembledata.bestconf["part2"]: + gsolv_min = conf.lowlevel_gsolv_info["energy"] + gsolv_min_id = conf.id + try: + dcosmors_gsolv_min = ( + conf.optimization_info["energy"] + - conf.lowlevel_gsolv_info["gas-energy"] + ) + except (TypeError, KeyError): + print( + "ERROR: Can't calculate DCOSMO-RS_gsolv. Skipping SD of Gsolv!" + ) + dorun = False + break + + for conf in list(calculate): + if conf.lowlevel_gsolv_compare_info["info"] == "not_calculated": + pass + if conf.lowlevel_gsolv_compare_info["info"] == "prep-failed": + # try again + pass + elif conf.lowlevel_gsolv_compare_info["info"] == "failed": + # dont remove conformer this is only to calculate SD + conf = calculate.pop(calculate.index(conf)) + conf.job["success"] = True + conf.lowlevel_gsolv_compare_info["energy"] = 0.0 + prev_calculated.append(conf) + print(f"Calculation of CONF{conf.id} failed in the previous run!") + elif conf.lowlevel_gsolv_compare_info["info"] == "calculated": + conf = calculate.pop(calculate.index(conf)) + conf.job["success"] = True + prev_calculated.append(conf) + # need to create folders + save_errors, store_confs, calculate = new_folders( + config.cwd, calculate, folder_compare, save_errors, store_confs + ) + # need to copy optimized coord to COSMO/GSOLV2 folder + for conf in calculate: + tmp1 = os.path.join( + config.cwd, + "CONF" + str(conf.id), + instruction_gsolv_compare["func"], + "coord", + ) + tmp2 = os.path.join("CONF" + str(conf.id), folder_compare, "coord") + try: + shutil.copy(tmp1, tmp2) + except FileNotFoundError: + print("ERROR can't copy optimized geometry!") + + if calculate: + calculate = run_in_parallel( + config, + q, + resultq, + job, + config.maxthreads, + calculate, + instruction_gsolv_compare, + folder_compare, + ) + for conf in calculate: + line = ( + f"{name} calculation {check[conf.job['success']]} for " + f"{last_folders(conf.job['workdir'], 2):>{pl}}: " + f"{conf.job['energy2']:>.8f}" + ) + print(line) + if not conf.job["success"]: + save_errors.append(line) + conf.lowlevel_gsolv_compare_info["info"] = "failed" + conf.lowlevel_gsolv_compare_info["method"] = conf.job["method"] + # store_confs.append(calculate.pop(calculate.index(conf))) + print("ERROR") + else: + conf.lowlevel_gsolv_compare_info["energy"] = conf.job["energy2"] + conf.lowlevel_gsolv_compare_info["info"] = "calculated" + conf.lowlevel_gsolv_compare_info["method"] = instruction_gsolv[ + "method" + ] + if prev_calculated: + for conf in list(prev_calculated): + conf.job["workdir"] = os.path.normpath( + os.path.join(config.cwd, "CONF" + str(conf.id), folder_compare) + ) + line = ( + f"{name} calculation {check[conf.job['success']]} for " + f"{last_folders(conf.job['workdir'], 2):>{pl}}: " + f"{conf.lowlevel_gsolv_compare_info['energy']:>.8f}" + ) + print(line) + calculate.append(prev_calculated.pop(prev_calculated.index(conf))) + for conf in calculate: + if conf.id == gsolv_min_id: + alpb_gsolv_min = conf.lowlevel_gsolv_compare_info["energy"] + + print("\nSD of solvation models (all units in kcal/mol):") + print( + f"CONFX ΔG(COSMO-RS) ΔG(DCOSMO-RS_gsolv) ΔG(ALPB_gsolv) " + f"SD(COSMO-RS 40%, DCOSMO-RS_gsolv 40%, ALPB_gsolv 20%)" + ) + print("".ljust(PLENGTH, "-")) + pl = max([len(str(conf.id)) for conf in calculate]) + for conf in calculate: + dgsolv = -gsolv_min + conf.lowlevel_gsolv_info["energy"] + dgdcosmors = -dcosmors_gsolv_min + ( + conf.optimization_info["energy"] + - conf.lowlevel_gsolv_info["gas-energy"] + ) + dgalpb = -alpb_gsolv_min + conf.lowlevel_gsolv_compare_info["energy"] + conf.lowlevel_gsolv_compare_info["std_dev"] = calc_weighted_std_dev( + [dgsolv, dgdcosmors, dgalpb], weights=[0.4, 0.4, 0.2] + ) + print( + f"CONF{conf.id:<{pl}} {dgsolv*AU2KCAL:^ 12.2f} " + f"{dgdcosmors*AU2KCAL:^ 19.2f} {dgalpb*AU2KCAL:^ 14.2f}" + f" {conf.lowlevel_gsolv_compare_info['std_dev']*AU2KCAL:^ 41.2f}" + ) + print("".ljust(PLENGTH, "-")) + dorun = False + break + # END SD Gsolv + + # calculate average G correction + print("\nCalculating Boltzmann averaged free energy of ensemble!\n") + avGcorrection = { + "avGcorrection": {}, + "avG": {}, + "avE": {}, + "avGsolv": {}, + "avGRRHO": {}, + } + if config.multitemp: + trange = [ + i for i in frange(config.trange[0], config.trange[1], config.trange[2]) + ] + else: + trange = [config.temperature] + # calculate Boltzmannweights + if not config.evaluate_rrho or config.solvent == "gas": + if not config.evaluate_rrho and config.solvent == "gas": + line = ( + f"{'temperature /K:':<15} {'avE(T) /a.u.':>14} " + f"{'avG(T) /a.u.':>14} " + # f"{'avGcorrection(T) /a.u.':>22}" + ) + elif not config.evaluate_rrho: + line = ( + f"{'temperature /K:':<15} {'avE(T) /a.u.':>14} " + f"{'avGsolv(T) /a.u.':>16} {'avG(T) /a.u.':>14} " + # f"{'avGcorrection(T) /a.u.':>22}" + ) + elif config.solvent == "gas": + line = ( + f"{'temperature /K:':<15} {'avE(T) /a.u.':>14} " + f"{'avGmRRHO(T) /a.u.':>16} {'avG(T) /a.u.':>14} " + # f"{'avGcorrection(T) /a.u.':>22}" + ) + else: + line = ( + f"{'temperature /K:':<15} {'avE(T) /a.u.':>14} " + f"{'avGmRRHO(T) /a.u.':>16} {'avGsolv(T) /a.u.':>16} " + f"{'avG(T) /a.u.':>14}" + # f" {'avGcorrection(T) /a.u.':>22}" + ) + print(line) + print("".ljust(int(PLENGTH), "-")) + for temperature in trange: + # get free energy at (T) + for conf in calculate: + if not config.evaluate_rrho: + rrho = None + else: + rrho = "lowlevel_grrho_info" + if config.solvent == "gas": + solv = None + else: + solv = "lowlevel_gsolv_info" + e = "lowlevel_sp_info" + conf.calc_free_energy(e=e, solv=solv, rrho=rrho, t=temperature) + try: + minfreeT = min( + [conf.free_energy for conf in calculate if conf.free_energy is not None] + ) + except ValueError: + raise ValueError + + calculate = calc_boltzmannweights(calculate, "free_energy", temperature) + avG = 0.0 + avE = 0.0 + avGRRHO = 0.0 + # avHRRHO but with new boltzmann weights? + avGsolv = 0.0 + for conf in calculate: + avG += conf.bm_weight * conf.free_energy + avE += conf.bm_weight * conf.lowlevel_sp_info["energy"] + avGRRHO += conf.bm_weight * conf.lowlevel_grrho_info["range"].get( + temperature, 0.0 + ) + avGsolv += conf.bm_weight * conf.lowlevel_gsolv_info["range"].get( + temperature, 0.0 + ) + + avGcorrection["avG"][temperature] = avG + avGcorrection["avE"][temperature] = avE + avGcorrection["avGRRHO"][temperature] = avGRRHO + avGcorrection["avGsolv"][temperature] = avGsolv + for conf in calculate: + if conf.free_energy == minfreeT: + avGcorrection["avGcorrection"][temperature] = avG - conf.free_energy + # printout: + if not config.evaluate_rrho or config.solvent == "gas": + if not config.evaluate_rrho and config.solvent == "gas": + line = ( + f"{temperature:^15} {avE:>14.7f} {avG:>14.7f} " + # f"{avGcorrection['avGcorrection'][temperature]:>22.7f}" + ) + elif not config.evaluate_rrho: + line = ( + f"{temperature:^15} {avE:>14.7f} {avGsolv:>16.7f} " + f"{avG:>14.7f} " + # f"{ avGcorrection['avGcorrection'][temperature]:>22.7f}" + ) + elif config.solvent == "gas": + line = ( + f"{temperature:^15} {avE:>14.7f} {avGRRHO:>16.7f} " + f"{avG:>14.7f} " + # f"{ avGcorrection['avGcorrection'][temperature]:>22.7f}" + ) + else: + line = ( + f"{temperature:^15} {avE:>14.7f} {avGRRHO:>16.7f} " + f"{avGsolv:>16.7f} {avG:>14.7f} " + # f"{ avGcorrection['avGcorrection'][temperature]:>22.7f}" + ) + if temperature == config.temperature: + print(line, " <<====") + else: + print(line) + print("".ljust(int(PLENGTH), "-")) + print("") + + # reset boltzmannweights to correct temperature + # get free energy at (T) + for conf in calculate: + if not config.evaluate_rrho: + rrho = None + else: + rrho = "lowlevel_grrho_info" + if config.solvent == "gas": + solv = None + else: + solv = "lowlevel_gsolv_info" + e = "lowlevel_sp_info" + conf.calc_free_energy(e=e, solv=solv, rrho=rrho) + # ensembledata is used to store avGcorrection + ensembledata.comment = [ + lowestconf, + "storage for avGcorrection of ensemble", + f"corresponding to CONF{lowestconf}", + ] + ensembledata.avGcorrection = avGcorrection + + if not calculate: + print("ERROR: No conformers left!") + print("Going to exit!") + sys.exit(1) + + calculate = calc_boltzmannweights(calculate, "free_energy", config.temperature) + + try: + minfree = min( + [conf.free_energy for conf in calculate if conf.free_energy is not None] + ) + except ValueError: + raise ValueError + for conf in calculate: + conf.rel_free_energy = (conf.free_energy - minfree) * AU2KCAL + + for conf in calculate: + if conf.free_energy == minfree: + ensembledata.bestconf["part2"] = conf.id + + # + print("") + onlyprintout = deepcopy(calculate) + onlyprintout.sort(reverse=True, key=lambda x: float(x.bm_weight)) + for i in (100, 95, 90, 80, 70): + sumup = 0.0 + for conf in list(onlyprintout): + sumup += conf.bm_weight + if sumup > (i / 100): + if conf.bm_weight <= (1 - (i / 100)): + onlyprintout.pop(onlyprintout.index(conf)) + + # write ensemble + outfile = f"enso_ensemble_part2_p_{i}.xyz" + # move_recursively(config.cwd, outfile) + kwargs = {"energy": "xtb_energy", "rrho": "lowlevel_grrho_info"} + write_trj( + sorted(onlyprintout, key=lambda x: float(x.free_energy)), + config.cwd, + outfile, + config.func, + config.nat, + "free_energy", + overwrite=True, + **kwargs, + ) + + # SORTING for the next part: + print("\n" + "".ljust(int(PLENGTH / 2), "-")) + print("Conformers considered further".center(int(PLENGTH / 2), " ")) + print("".ljust(int(PLENGTH / 2), "-") + "\n") + # evaluate conformer consideration based on Boltzmann-population + calculate.sort(reverse=True, key=lambda x: float(x.bm_weight)) + sumup = 0.0 + for conf in list(calculate): + sumup += conf.bm_weight + if sumup >= (config.part2_threshold / 100): + if conf.bm_weight < (1 - (config.part2_threshold / 100)): + mol = calculate.pop(calculate.index(conf)) + mol.part_info["part2"] = "refused" + store_confs.append(mol) + else: + conf.part_info["part2"] = "passed" + else: + conf.part_info["part2"] = "passed" + + ensembledata.nconfs_per_part["part2"] = len(calculate) + if calculate: + print( + f"\nConformers that are below the Boltzmann-thr of {config.part2_threshold}%:" + ) + print_block(["CONF" + str(i.id) for i in calculate]) + else: + print("ERROR: No conformers left!") + print("Going to exit!") + sys.exit(1) + + # save current data to jsonfile + config.write_json( + config.cwd, + [i.provide_runinfo() for i in calculate] + + [i.provide_runinfo() for i in prev_calculated] + + [i.provide_runinfo() for i in store_confs] + + [ensembledata], + config.provide_runinfo(), + ) + # write ensemble + move_recursively(config.cwd, "enso_ensemble_part2.xyz") + if config.evaluate_rrho: + kwargs = {"energy": "xtb_energy_unbiased", "rrho": "lowlevel_grrho_info"} + else: + kwargs = {"energy": "xtb_energy_unbiased"} + write_trj( + sorted(calculate, key=lambda x: float(x.free_energy)), + config.cwd, + "enso_ensemble_part2.xyz", + config.func, + config.nat, + "free_energy", + **kwargs, + ) + + # write coord.enso_best + for conf in calculate: + if conf.id == ensembledata.bestconf["part2"]: + # copy the lowest optimized conformer to file coord.enso_best + with open( + os.path.join("CONF" + str(conf.id), config.func, "coord"), + "r", + encoding=CODING, + newline=None, + ) as f: + coord = f.readlines() + with open( + os.path.join(config.cwd, "coord.enso_best"), "w", newline=None + ) as best: + best.write( + "$coord # {} {} !CONF{} \n".format( + conf.free_energy, conf.lowlevel_grrho_info["energy"], conf.id + ) + ) + for line in coord[1:]: + if "$" in line: # stop at $end ... + break + best.write(line) + best.write("$end \n") + + # reset + for conf in calculate: + conf.free_energy = 0.0 + conf.rel_free_energy = 0.0 + conf.bm_weight = 0.0 + conf.reset_job_info() + + if save_errors: + print( + "***---------------------------------------------------------***", + file=sys.stderr, + ) + print( + "Printing most relevant errors again, just for user convenience:", + file=sys.stderr, + ) + for _ in list(save_errors): + print(save_errors.pop(), file=sys.stderr) + print( + "***---------------------------------------------------------***", + file=sys.stderr, + ) + + tmp = int((PLENGTH - len("END of Part2")) / 2) + print("\n" + "".ljust(tmp, ">") + "END of Part2" + "".rjust(tmp, "<")) + return config, calculate, store_confs, ensembledata diff --git a/censo_qm/orca_job.py b/censo_qm/orca_job.py new file mode 100644 index 0000000..a3a66fa --- /dev/null +++ b/censo_qm/orca_job.py @@ -0,0 +1,881 @@ +""" +Contains OrcaJob class for calculating ORCA related properties of conformers. +""" +from collections import OrderedDict +import os +import time +import subprocess +import shutil +from .cfg import CODING, ENVIRON, censo_solvent_db, external_paths +from .utilities import last_folders, t2x, x2t, print +from .qm_job import QmJob + + +class OrcaJob(QmJob): + """ + Perform calculations with ORCA + - create orca.inp input + - single-point calculation + - smd_gsolv calculation + - optimization with xTB as driver + - shielding constant calculations + - coupling constant calculations + - writing of generic output for shielding and coupling constants + """ + + def __init__(self, rank, *args, **kwargs): + QmJob.__init__(self, rank, *args, **kwargs) + + def _prep_input(self, xyzfile=False, returndict=False): + """ + cefine preparation step analogue + + use: + xyzfile --> if set : * xyzfile ... xyzfile.xyz + returns + call --> list with settings + """ + + # low => grid3 loosescf + # low+ >> grid3 scfconv6 + # high --> + # input generation + # if optlevel == 'crude': + # inp.write("! grid3 loosescf \n") + # elif optlevel == 'lax': + # inp.write("! grid4 scfconv6 \n") + # else: + # inp.write("! grid4 \n") + + orcainput_start = OrderedDict( + [ + ("functional", None), + ("disp", None), + ("basis", None), + ("gcp", None), + ("RI-approx", None), + ("grid", None), + ("scfconv", None), + ("frozencore", None), + ("mp2", None), + ("default", None), + ("job", None), + ("optthreshold", None), + ("parallel", None), + ("solvation", None), + ("geom", None), + ("couplings", None), + ("shieldings", None), + ] + ) + if "nmr" in self.job["prepinfo"]: + nmrprop = True + else: + nmrprop = False + + # definitions: + composite_dfa = ("pbeh-3c", "b97-3c", "b973c", "hf-3c", "hf3c", "r2scan-3c") + ggadfa = ("tpss", "pbe", "kt2", "b97-d3") + # B97-D3 Grimme’s GGA including D3 dispersion correction + hybriddfa = ("pbe0", "pw6b95", "wb97x-d3") + dhdfa = ("dsd-blyp",) + + disp_already_included_in_func = () + + # build up call: + default_call = [ + "! smallprint printgap noloewdin", + "! NOSOSCF", + "%MaxCore 8000", + "%output", + " print[P_BondOrder_M] 1", + " print[P_Mayer] 1", + " print[P_basis] 2", + "end", + ] + + orcainput = orcainput_start.copy() + orcainput["default"] = default_call + # set functional + if self.job["func"] in composite_dfa: + orcainput["functional"] = [f"! {self.job['func']}"] + else: + if self.job["func"] == "kt2": + orcainput["functional"] = [ + "%method", + " method dft", + " functional gga_xc_kt2", + "end", + ] + elif self.job["func"] == "dsd-blyp": + orcainput["functional"] = [f"! ri-{self.job['func']}"] + else: + orcainput["functional"] = [f"! {self.job['func']}"] + # set basis set + orcainput["basis"] = [f"! {self.job['basis']}"] + # set gcp: + if "DOGCP" in self.job['prepinfo']: + gcp_keywords = { + 'minis': "MINIS", + "sv": "SV", + "6-31g(d)": "631GD", + 'def2-sv(p)': "SV(P)", + 'def2-svp': "SVP", + 'def2-tzvp': "TZ", + } + if self.job['basis'].lower() in gcp_keywords.keys(): + orcainput["gcp"] = [f"! GCP(DFT/{gcp_keywords[self.job['basis'].lower()]})"] + # set RI def2/J, RIJCOSX def2/J gridx6 NOFINALGRIDX, RIJK def2/JK + if self.job["func"] in dhdfa: + if nmrprop: + orcainput["frozencore"] = ["!NOFROZENCORE"] + else: + orcainput["frozencore"] = ["! Frozencore"] + def2cbasis = ("Def2-SVP", "Def2-TZVP", "Def2-TZVPP", "Def2-QZVPP") + if str(self.job["basis"]).upper() in def2cbasis: + # --> decide cosx or RIJK + orcainput["RI-approx"] = [ + f"! def2/J {str(self.job['basis'])}/C RIJCOSX GRIDX7 NOFINALGRIDX" + ] + # call.append(f"! RIJK def2/JK {str(self.job['basis'])}/C") + else: + orcainput["RI-approx"] = [ + f"! def2/J def2-TZVPP/C RIJCOSX GRIDX7 NOFINALGRIDX" + ] + # call.append(f"! RIJK def2/JK def2-TZVPP/C ") + if nmrprop: + orcainput["mp2"] = [ + "%mp2", + " RI true", + " density relaxed", + "end", + ] + else: + orcainput["mp2"] = ["%mp2", " RI true", "end"] + elif self.job["func"] in hybriddfa: + orcainput["RI-approx"] = [f"! def2/J RIJCOSX GRIDX6 NOFINALGRIDX"] + elif self.job["func"] in composite_dfa: + pass + else: # essentially gga + orcainput["RI-approx"] = ["! RI def2/J"] + # set grid + if self.job["func"] in dhdfa or self.job["func"] in hybriddfa: + orcainput["grid"] = ["! grid5 nofinalgrid"] + else: + orcainput["grid"] = ["! grid4 nofinalgrid"] + + orcainput["scfconv"] = ["! scfconv6"] + # set scfconv or convergence threshold e.g. loosescf or scfconv6 + + extension = { + "low": {"grid": ["! grid4 nofinalgrid"], "scfconv": ["! loosescf"]}, + "low+": {"grid": ["! grid4 nofinalgrid"], "scfconv": ["! scfconv6"]}, + "high": {"grid": ["! grid4 nofinalgrid"], "scfconv": ["! scfconv7"]}, + "high+": {"grid": ["! grid5 nofinalgrid"], "scfconv": ["! scfconv7"]}, + } + if self.job["prepinfo"]: + if isinstance(self.job["prepinfo"], list): + if self.job["prepinfo"][0] in extension.keys(): + orcainput["grid"] = extension[self.job["prepinfo"][0]]["grid"] + orcainput["scfconv"] = extension[self.job["prepinfo"][0]]["scfconv"] + else: + pass + + # add dispersion + if self.job["func"] not in composite_dfa: + orcainput["disp"] = ["! d3bj"] + # optimization ancopt or pure orca + if self.job["jobtype"] == "xtbopt": + orcainput["job"] = ["! ENGRAD"] + elif self.job["jobtype"] == "opt": + orcainput["job"] = ["! OPT"] + # add thresholds + orcainput["optthreshold"] = [] + # nprocs + if int(self.job["omp"]) >= 1: + orcainput["parallel"] = [ + "%pal", + " nprocs {}".format(self.job["omp"]), + "end", + ] + # solvent model + # upd_solvent = { + # "chcl3": "chloroform", + # "h2o": "water", + # "ch2cl2":"dichloromethane", + # "octanol": "1-octanol", + # "hexadecane": "N-HEXADECANE", + # } + # solventexch = { + # "acetone": "Acetone", + # "chcl3": "Chloroform", + # "acetonitrile": "Acetonitrile", + # "ch2cl2": "CH2Cl2", + # "dmso": "DMSO", + # "h2o": "Water", + # "methanol": "Methanol", + # "thf": "THF", + # "toluene": "Toluene", + # "octanol": "Octanol", + # } + # if self.job['solvent'] != 'gas': + # if self.job['sm'] in ('smd', 'smd_gsolv'): + # self.job['solvent'] = upd_solvent.get(self.job['solvent'], self.job['solvent']) + # orcainput['solvation'] = [ + # '%cpcm', + # ' smd true', + # (f' smdsolvent ' + # f'"{solventexch.get(self.job["solvent"],self.job["solvent"])}"'), + # 'end', + # ] + # elif self.job['sm'] == 'cpcm': + # orcainput['solvation'] = [( + # f"! CPCM(" + # f"{solventexch.get(self.job['solvent'],self.job['solvent'])})" + # ), + # ] + if self.job["solvent"] != "gas": + if self.job["sm"] in ("smd", "smd_gsolv"): + orcainput["solvation"] = [ + "%cpcm", + " smd true", + ( + f" smdsolvent " + f'"{censo_solvent_db[self.job["solvent"]]["smd"][1]}"' + ), + "end", + ] + elif self.job["sm"] == "cpcm": + orcainput["solvation"] = [ + (f"! CPCM(" f"{censo_solvent_db[self.job['solvent']]['cpcm'][1]})") + ] + # unpaired, charge, and coordinates + if xyzfile: + orcainput["geom"] = [ + ( + f"* xyzfile {self.job['charge']} " + f"{self.job['unpaired']+1} {str(xyzfile)}" + ) + ] + else: + # xyz geometry + geom, _ = t2x(self.job["workdir"]) + orcainput["geom"] = [f"*xyz {self.job['charge']} {self.job['unpaired']+1}"] + orcainput["geom"].extend(geom) + orcainput["geom"].append("*") + # couplings + if nmrprop and "nmrJ" in self.job["prepinfo"]: + tmp = [] + tmp.append("%eprnmr") + if self.job["hactive"]: + tmp.append(" Nuclei = all H { ssfc }") + if self.job["cactive"]: + tmp.append(" Nuclei = all C { ssfc }") + if self.job["factive"]: + tmp.append(" Nuclei = all F { ssfc }") + if self.job["siactive"]: + tmp.append(" Nuclei = all Si { ssfc }") + if self.job["pactive"]: + tmp.append(" Nuclei = all P { ssfc }") + tmp.append(" SpinSpinRThresh 8.0") + tmp.append("end") + orcainput["couplings"] = tmp + # shielding + if nmrprop and "nmrS" in self.job["prepinfo"]: + tmp = [] + tmp.append("%eprnmr") + if self.job["hactive"]: + tmp.append(" Nuclei = all H { shift }") + if self.job["cactive"]: + tmp.append(" Nuclei = all C { shift }") + if self.job["factive"]: + tmp.append(" Nuclei = all F { shift }") + if self.job["siactive"]: + tmp.append(" Nuclei = all Si { shift }") + if self.job["pactive"]: + tmp.append(" Nuclei = all P { shift }") + tmp.append(" origin giao") + tmp.append(" giao_2el giao_2el_same_as_scf") + tmp.append(" giao_1el giao_1el_analytic") + tmp.append("end") + orcainput["shieldings"] = tmp + + error_logical = False + if not orcainput["functional"]: + error_logical = True + elif not orcainput["basis"] and self.job["func"] not in composite_dfa: + error_logical = True + elif not orcainput["geom"]: + error_logical = True + if error_logical: + print("unusable input!") + + tmp = [] + for key, value in orcainput.items(): + if value: + tmp.extend(value) + if returndict: + return tmp, orcainput + else: + return tmp + + def _sp(self, silent=False): + """ + ORCA input generation and single-point calculation + """ + if not self.job["onlyread"]: + with open( + os.path.join(self.job["workdir"], "inp"), "w", newline=None + ) as inp: + for line in self._prep_input(): + inp.write(line + "\n") + + # Done writing input! + time.sleep(0.02) + if not silent: + print(f"Running single-point in {last_folders(self.job['workdir'], 2)}") + # start SP calculation + with open( + os.path.join(self.job["workdir"], "sp.out"), "w", newline=None + ) as outputfile: + call = [os.path.join(external_paths["orcapath"], "orca"), "inp"] + subprocess.call( + call, + shell=False, + stdin=None, + stderr=subprocess.STDOUT, + universal_newlines=False, + cwd=self.job["workdir"], + stdout=outputfile, + ) + time.sleep(0.05) + # check if scf is converged: + if os.path.isfile(os.path.join(self.job["workdir"], "sp.out")): + with open( + os.path.join(self.job["workdir"], "sp.out"), + "r", + encoding=CODING, + newline=None, + ) as inp: + stor = inp.readlines() + for line in stor: + if "FINAL SINGLE POINT ENERGY" in line: + self.job["energy"] = float(line.split()[4]) + if "ORCA TERMINATED NORMALLY" in line: + self.job["success"] = True + if not self.job["success"]: + self.job["energy"] = 0.0 + self.job["success"] = False + print( + f"ERROR: scf in {last_folders(self.job['workdir'], 2)} " + "not converged!" + ) + else: + self.job["energy"] = 0.0 + self.job["success"] = False + print( + f"WARNING: {os.path.join(self.job['workdir'], 'sp.out')} " + "doesn't exist!" + ) + return + + def _smd_gsolv(self): + """ + Calculate SMD_gsolv, needs ORCA + if optimization is not performed with ORCA, only the density + functional for optimization is employed, + from my understanding smd is parametrized at 298 K, therefore it should only + be used at this temperature. + energy --> gas phase + energy2 --> smd_gsolv gsolv contribution + """ + energy_gas = None + energy_solv = None + print( + f"Running SMD_gsolv calculation in " + f"{last_folders(self.job['workdir'], 2)}." + ) + # calculate gas phase + keepsolv = self.job["solvent"] + keepsm = self.job["sm"] + self.job["solvent"] = "gas" + self.job["sm"] = "gas-phase" + self._sp(silent=True) + + if self.job["success"] == False: + self.job["success"] = False + self.job["energy"] = 0.0 + self.job["energy2"] = 0.0 + print( + f"ERROR: in gas phase single-point " + f"of {last_folders(self.job['workdir'], 2):18}" + ) + return + else: + energy_gas = self.job["energy"] + self.job["energy"] = 0.0 + # mv inp inp_solv sp.out sp_solv.out + try: + shutil.move( + os.path.join(self.job["workdir"], "inp"), + os.path.join(self.job["workdir"], "inp_gas"), + ) + shutil.move( + os.path.join(self.job["workdir"], "sp.out"), + os.path.join(self.job["workdir"], "sp_gas.out"), + ) + except FileNotFoundError: + pass + # calculate in solution + self.job["solvent"] = keepsolv + self.job["sm"] = keepsm + self._sp(silent=True) + if self.job["success"] == False: + self.job["success"] = False + self.job["energy"] = 0.0 + self.job["energy2"] = 0.0 + print( + f"ERROR: in gas solution phase single-point " + f"of {last_folders(self.job['workdir'], 2):18}" + ) + return + else: + energy_solv = self.job["energy"] + self.job["energy"] = 0.0 + # mv inp inp_solv sp.out sp_solv.out + try: + shutil.move( + os.path.join(self.job["workdir"], "inp"), + os.path.join(self.job["workdir"], "inp_solv"), + ) + shutil.move( + os.path.join(self.job["workdir"], "sp.out"), + os.path.join(self.job["workdir"], "sp_solv.out"), + ) + except FileNotFoundError: + pass + if self.job["success"]: + if energy_solv is None or energy_gas is None: + self.job["energy"] = 0.0 + self.job["energy"] = 0.0 + self.job["success"] = False + print( + f"ERROR: in SMD_Gsolv calculation " + f"{last_folders(self.job['workdir'], 2):18}" + ) + else: + self.job["energy"] = energy_gas + self.job["energy2"] = energy_solv - energy_gas + self.job["success"] = True + return + + def _xtbopt(self): + """ + ORCA input generation and geometry optimization using ANCOPT + implemented within xtb, generates inp.xyz, inp (orca-input) + and adds information to coord (xtb can then tell which file + orca has to use). + + uses: + fullopt --> outputname decision + workdir --> folder of calculation + + return: + cycles --> number of optimization cycles + ecyc --> energy at cycle + energy --> energy at last step + success --> calulation without crash + converged --> geometry optimization converged + """ + error_logical = False + if self.job["fullopt"]: + output = "opt-part2.out" + else: + output = "opt-part1.out" + if not self.job["onlyread"]: + print(f"Running optimization in {last_folders(self.job['workdir'], 2):18}") + files = [ + "xtbrestart", + "xtbtopo.mol", + "xcontrol-inp", + "wbo", + "charges", + "gfnff_topo", + ] + for file in files: + if os.path.isfile(os.path.join(self.job["workdir"], file)): + os.remove(os.path.join(self.job["workdir"], file)) + if not self.job["onlyread"]: + # convert coord to xyz, write inp.xyz + t2x(self.job["workdir"], writexyz=True, outfile="inp.xyz") + # add inputfile information to coord (xtb as a driver) + with open( + os.path.join(self.job["workdir"], "coord"), "r", newline=None + ) as coord: + tmp = coord.readlines() + with open( + os.path.join(self.job["workdir"], "coord"), "w", newline=None + ) as newcoord: + for line in tmp[:-1]: + newcoord.write(line) + newcoord.write("$external\n") + newcoord.write(" orca input file= inp\n") + newcoord.write( + f" orca bin= {os.path.join(self.job['progpath'], 'orca')}" + ) + newcoord.write("$end") + + with open( + os.path.join(self.job["workdir"], "inp"), "w", newline=None + ) as inp: + for line in self._prep_input(xyzfile="inp.xyz"): + inp.write(line + "\n") + # Done writing input! + callargs = [ + self.job["xtb_driver_path"], + "coord", + "--opt", + self.job["optlevel"], + "--orca", + ] + with open( + os.path.join(self.job["workdir"], "opt.inp"), "w", newline=None + ) as out: + out.write("$opt \n") + if ( + self.job["optcycles"] is not None + and float(self.job["optcycles"]) > 0 + ): + out.write(f"maxcycle={str(self.job['optcycles'])} \n") + out.write(f"microcycle={str(self.job['optcycles'])} \n") + out.write("average conv=true \n") + out.write(f"hlow={self.job.get('hlow', 0.01)} \n") + out.write("s6=30.00 \n") + # remove unnecessary sp/gradient call in xTB + out.write("engine=lbfgs\n") + out.write("$end \n") + callargs.append("-I") + callargs.append("opt.inp") + time.sleep(0.02) + with open( + os.path.join(self.job["workdir"], output), "w", newline=None + ) as outputfile: + returncode = subprocess.call( + callargs, + shell=False, + stdin=None, + stderr=subprocess.STDOUT, + universal_newlines=False, + cwd=self.job["workdir"], + stdout=outputfile, + env=ENVIRON, + ) + if returncode != 0: + error_logical = True + print( + "ERROR: optimization in {:18} not converged".format( + last_folders(self.job["workdir"], 2) + ) + ) + time.sleep(0.02) + # check if optimization finished correctly: + if os.path.isfile(os.path.join(self.job["workdir"], output)): + with open( + os.path.join(self.job["workdir"], output), + "r", + encoding=CODING, + newline=None, + ) as inp: + stor = inp.readlines() + for line in stor: + if ( + "external code error" in line + or "|grad| > 500, something is totally wrong!" in line + or "abnormal termination of xtb" in line + ): + print( + "ERROR: optimization in {:18} not converged".format( + last_folders(self.job["workdir"], 2) + ) + ) + error_logical = True + break + elif " FAILED TO CONVERGE GEOMETRY " in line: + self.job["cycles"] += int(line.split()[7]) + self.job["converged"] = False + elif "*** GEOMETRY OPTIMIZATION CONVERGED AFTER " in line: + self.job["cycles"] += int(line.split()[5]) + self.job["converged"] = True + with open( + os.path.join(self.job["workdir"], output), + "r", + encoding=CODING, + newline=None, + ) as inp: + for line in inp: + if "av. E: " in line: + # self.job["ecyc"].append(float(line.split("Eh")[0].split()[-1])) + self.job["ecyc"].append(float(line.split("->")[-1])) + if " :: gradient norm " in line: + self.job["grad_norm"] = float(line.split()[3]) + else: + print( + "WARNING: {} doesn't exist!".format( + os.path.join(self.job["workdir"], output) + ) + ) + error_logical = True + if not error_logical: + try: + self.job["energy"] = self.job["ecyc"][-1] + self.job["success"] = True + except: + error_logical = True + if error_logical: + self.job["energy"] = 0.0 + self.job["success"] = False + self.job["converged"] = False + self.job["ecyc"] = [] + self.job["grad_norm"] = 10.0 + + # convert optimized xyz to coord file + x2t(self.job["workdir"], infile="inp.xyz") + return + + def _nmrS(self): + """ + ORCA NMR shielding constant calculation + """ + with open(os.path.join(self.job["workdir"], "inpS"), "w", newline=None) as inp: + for line in self._prep_input(): + inp.write(line + "\n") + # Done input! + # shielding calculation + print( + "Running shielding calculation in {:18}".format( + last_folders(self.job["workdir"], 2) + ) + ) + with open( + os.path.join(self.job["workdir"], "orcaS.out"), "w", newline=None + ) as outputfile: + call = [os.path.join(self.job["progpath"], "orca"), "inpS"] + subprocess.call( + call, + shell=False, + stdin=None, + stderr=subprocess.STDOUT, + universal_newlines=False, + cwd=self.job["workdir"], + stdout=outputfile, + ) + time.sleep(0.1) + # check if calculation was successfull: + with open( + os.path.join(self.job["workdir"], "orcaS.out"), + "r", + encoding=CODING, + newline=None, + ) as inp: + store = inp.readlines() + self.job["success"] = False + for line in store: + if "ORCA TERMINATED NORMALLY" in line: + self.job["success"] = True + if not self.job["success"]: + print( + "ERROR: shielding calculation in {:18} failed!".format( + last_folders(self.job["workdir"], 1) + ) + ) + return + + def _nmrJ(self): + """ + ORCA NMR coupling constant calculation + + uses: + prepinfo nmrJ + workdir + progpath + success + """ + # generate input # double hybrids not implemented + with open(os.path.join(self.job["workdir"], "inpJ"), "w", newline=None) as inp: + for line in self._prep_input(): + inp.write(line + "\n") + # Done input! + # start coupling calculation + print( + "Running coupling calculation in {}".format( + last_folders(self.job["workdir"], 2) + ) + ) + with open( + os.path.join(self.job["workdir"], "orcaJ.out"), "w", newline=None + ) as outputfile: + call = [os.path.join(self.job["progpath"], "orca"), "inpJ"] + subprocess.call( + call, + shell=False, + stdin=None, + stderr=subprocess.STDOUT, + universal_newlines=False, + cwd=self.job["workdir"], + stdout=outputfile, + ) + time.sleep(0.1) + # check if calculation was successfull: + with open( + os.path.join(self.job["workdir"], "orcaJ.out"), + "r", + encoding=CODING, + newline=None, + ) as inp: + store = inp.readlines() + self.job["success"] = False + for line in store: + if "ORCA TERMINATED NORMALLY" in line: + self.job["success"] = True + if not self.job["success"]: + print( + "ERROR: coupling calculation in {:18} failed!".format( + last_folders(self.job["workdir"], 1) + ) + ) + return + + def _genericoutput(self): + """ + ORCA read shielding and coupling constants and write them to plain output + """ + fnameshield = "orcaS.out" + atom = [] + sigma = [] + try: + with open( + os.path.join(self.job["workdir"], fnameshield), + "r", + encoding=CODING, + newline=None, + ) as inp: + data = inp.readlines() + for line in data: + if "CHEMICAL SHIELDING SUMMARY (ppm)" in line: + start = data.index(line) + for line in data[(start + 6) :]: + splitted = line.split() + if len(splitted) == 4: + atom.append(int(splitted[0]) + 1) + sigma.append(float(splitted[2])) + else: + break + except FileNotFoundError: + print( + "Missing file: {} in {}".format( + fnameshield, last_folders(self.job["workdir"], 2) + ) + ) + self.job["success"] = False + self.job["success"] = True + fnamecoupl = "orcaJ.out" + atom1 = [] + atom2 = [] + jab = [] + try: + with open( + os.path.join(self.job["workdir"], fnamecoupl), + "r", + encoding=CODING, + newline=None, + ) as inp: + data = inp.readlines() + for line in data: + if "NMR SPIN-SPIN COUPLING CONSTANTS" in line: + start = int(data.index(line)) + 6 + if " ****ORCA TERMINATED NORMALLY****" in line: + end = int(data.index(line)) + + for line in data[start:end]: + if "NUCLEUS" in line: + tmpsplitted = line.split() + atom1.append(int(tmpsplitted[4]) + 1) + atom2.append(int(tmpsplitted[9]) + 1) + elif "Total" in line and "iso= " in line: + splitted = line.split() + jab.append(float(splitted[5])) + else: + pass + except FileNotFoundError: + print( + "Missing file: {} in {}".format( + fnamecoupl, last_folders(self.job["workdir"], 2) + ) + ) + self.job["success"] = False + self.job["success"] = True + with open( + os.path.join(self.job["workdir"], "nmrprop.dat"), "w", newline=None + ) as out: + s = sorted(zip(atom, sigma)) + atom, sigma = map(list, zip(*s)) + for i in range(len(atom)): + out.write("{:{digits}} {}\n".format(atom[i], sigma[i], digits=4)) + for i in range(self.job["nat"] - len(atom)): + out.write("\n") + for i in range(len(atom1)): + out.write( + "{:{digits}} {:{digits}} {}\n".format( + atom1[i], atom2[i], jab[i], digits=4 + ) + ) + time.sleep(0.02) + return + + def execute(self): + """ + Choose what to execute for the jobtype + use: + prep --> ignore + sp --> _sp + cosmors --> not with orca + opt --> pure opt with ORCA + xtbopt --> opt with xtb as driver + rrhoxtb --> _rrho() + """ + if self.job["jobtype"] == "prep": + self.job["success"] = True + pass + elif self.job["jobtype"] == "xtb_sp": + self._xtb_sp() + elif self.job["jobtype"] in ("sp", "sp_implicit"): + self._sp() + elif self.job["jobtype"] == "opt": + print("RUNNING xtbopt!!!") + # self._opt() + self._xtbopt() + elif self.job["jobtype"] == "xtbopt": + self._xtbopt() + elif self.job["jobtype"] == "rrhoxtb": + self._xtbrrho() + # elif self.job['jobtype'] == "rrhoorca": + # self._rrho() + elif self.job["jobtype"] == "smd_gsolv": + self._smd_gsolv() + elif self.job["jobtype"] == "nmrJ": + self._nmrJ() + elif self.job["jobtype"] == "nmrS": + self._nmrS() + elif self.job["jobtype"] == "genericout": + self._genericoutput() + elif self.job["jobtype"] in ("gbsa_gsolv", "alpb_gsolv"): + if self.job["prepinfo"]: + tmp_solvent = self.job["solvent"] + self.job["solvent"] = "gas" + self._sp() + if not self.job["success"]: + return + self.job["solvent"] = tmp_solvent + self._xtb_gsolv() + else: + print(f"JOBTYPE {self.job['jobtype']} UNKNOWN!") diff --git a/censo_qm/parallel.py b/censo_qm/parallel.py new file mode 100644 index 0000000..6a74bbe --- /dev/null +++ b/censo_qm/parallel.py @@ -0,0 +1,131 @@ +""" +Performs the parallel execution of the QM calls. +""" +import time +import os +import traceback +from multiprocessing import Process +from .qm_job import QmJob +from .tm_job import TmJob +from .orca_job import OrcaJob +from .utilities import print + + +def execute_data(q, resultq): + """ + code that the worker has to execute + """ + while True: + if q.empty(): + break + task = q.get() + try: + task.execute() + except Exception as e: + print(e) + task.hugeERROR = e + task.tb = traceback.format_exc() + resultq.put(task) + q.task_done() + if q.empty(): + break + else: + task = q.get() + resultq.put(task) + q.task_done() + resultq.put(task) + time.sleep(0.02) + q.task_done() + time.sleep(0.02) + + +def run_in_parallel( + config, q, resultq, job, maxthreads, loopover, instructdict, foldername="" +): + """Run jobs in parallel + q = queue to put assemble tasks + resultq = queue to retrieve results + job = information which kind of job is to be performed tm_job , orca_job + loopover is list of qm_class objects + instrucdict example : {'jobtype': 'prep', 'chrg': args.chrg} + foldername is for existing objects to change the workdir + results = list of qm_class objects with results from calculations + """ + if instructdict.get("jobtype", None) is None: + raise KeyError("jobtype is missing in instructdict!") + if all(isinstance(x, QmJob) for x in loopover): + for item in loopover: + if isinstance(item, TmJob) and job == OrcaJob: + item.__class__ = job + elif isinstance(item, OrcaJob) and job == TmJob: + item.__class__ = job + elif isinstance(item, QmJob) and job != QmJob: + item.__class__ = job + item.job["workdir"] = os.path.normpath( + os.path.join(config.cwd, "CONF" + str(item.id), foldername) + ) + # update instructions + item.job.update(instructdict) + # put item on queue + q.put(item) + time.sleep(0.02) + time.sleep(0.02) + njobs = q.qsize() + if instructdict.get("onlyread", False): + print(f"\nReading data from {njobs} conformers calculated in " "previous run.") + else: + response = { + "prep": f"\nPreparing {q.qsize()} calculations.", + "sp": f"\nStarting {q.qsize()} single-point calculations.", + "xtb_sp": f"\nStarting {q.qsize()} xTB - single-point calculations.", + "lax_sp": f"\nStarting {q.qsize()} lax-single-point calculations.", + "cosmors": f"\nStarting {q.qsize()} COSMO-RS-Gsolv calculations.", + "gbsa_gsolv": f"\nStarting {q.qsize()} GBSA-Gsolv calculations", + "alpb_gsolv": f"\nStarting {q.qsize()} ALPB-Gsolv calculations", + "smd_gsolv": f"\nStarting {q.qsize()} SMD-Gsolv calculations", + "rrhoxtb": f"\nStarting {q.qsize()} G_RRHO calculations.", + "rrhoorca": f"\nStarting {q.qsize()} G_RRHO calculations.", + "rrhotm": f"\nStarting {q.qsize()} G_RRHO calculations.", + "opt": f"\nStarting {q.qsize()} optimizations.", + "xtbopt": f"\nStarting {q.qsize()} optimizations.", + "couplings": f"\nStarting {q.qsize()} coupling constants calculations", + "couplings_sp": f"\nStarting {q.qsize()} coupling constants calculations", + "shieldings": f"\nStarting {q.qsize()} shielding constants calculations", + "shieldings_sp": f"\nStarting {q.qsize()} shielding constants calculations", + "genericoutput": f"\nWriting {q.qsize()} generic outputs.", + "opt-rot": f"\nStarting {q.qsize()} optical-rotation calculations.", + "opt-rot_sp": f"\nStarting {q.qsize()} optical-rotation calculations.", + } + if instructdict["jobtype"] in response: + print(response[instructdict["jobtype"]]) + + # start working in parallel + for _ in range(int(maxthreads)): + worker = Process(target=execute_data, args=(q, resultq)) + worker.daemon = True + worker.start() + # NOBODY IS ALLOWED TO TOUCH THIS SLEEP STATEMENT!!!! + time.sleep(0.05) # sleep is important don't remove it!!! + # seriously don't remove it! + q.join() + + if not instructdict.get("onlyread", False): + print("Tasks completed!\n") + else: + print("Reading data from previous run completed!\n") + + # Get results + results = [] + while not resultq.empty(): + results.append(resultq.get()) + if getattr(results[-1], "hugeERROR", False): + print(getattr(results[-1], "tb")) + raise getattr(results[-1], "hugeERROR") + time.sleep(0.01) # sleep is important don't remove it!!! + # seriously don't remove it! + + time.sleep(0.02) + results.sort(key=lambda x: int(x.id)) + if njobs != len(results): + print(f"ERROR some conformers were lost!") + return results diff --git a/censo_qm/prescreening.py b/censo_qm/prescreening.py new file mode 100755 index 0000000..dfaa84a --- /dev/null +++ b/censo_qm/prescreening.py @@ -0,0 +1,1076 @@ +""" +prescreening == part1, calculate free energy on GFNn-xTB input geometry +idea is to improve on E and (Gsolv) +""" +import os +import sys +import math +from multiprocessing import JoinableQueue as Queue +from .cfg import PLENGTH, DIGILEN, AU2KCAL, CODING, censo_solvent_db +from .parallel import run_in_parallel +from .orca_job import OrcaJob +from .tm_job import TmJob +from .utilities import ( + check_for_folder, + print_block, + new_folders, + last_folders, + ensemble2coord, + printout, + move_recursively, + write_trj, + check_tasks, + calc_std_dev, + spearman, + print, + calc_boltzmannweights, +) + + +def part1(config, conformers, store_confs, ensembledata): + """ + Prescreening of the ensemble, with single-points on combined ensemble + geometries. + Calculate low level free energies with COSMO-RS single-point and gsolv + contribution and GFNFF-bhess thermostatistical contribution. + Input: + - config [conifg_setup object] contains all settings + - conformers [list of molecule_data objects] each conformer is represented + - ensembledata -> instance for saving ensemble (not conf) related data + Return: + -> config + -> conformers + -> store_confs + """ + save_errors = [] + print("\n" + "".ljust(PLENGTH, "-")) + print("CRE PRESCREENING - PART1".center(PLENGTH, " ")) + print("".ljust(PLENGTH, "-") + "\n") + # print flags for part1 + info = [] + info.append(["prog", "program"]) + info.append(["func", "functional for part1 and 2"]) + info.append(["basis", "basis set for part1 and 2"]) + if config.solvent != "gas": + info.append(["solvent", "Solvent"]) + info.append(["smgsolv1", "solvent model for Gsolv contribution"]) + info.append(["part1_threshold", "threshold"]) + info.append( + ["printoption", "starting number of considered conformers", len(conformers)] + ) + info.append(["evaluate_rrho", "calculate mRRHO contribution"]) + if config.evaluate_rrho: + info.append(["prog_rrho", "program for mRRHO contribution"]) + if config.prog_rrho == "xtb": + info.append(["part1_gfnv", "GFN version for mRRHO and/or GBSA_Gsolv"]) + if config.bhess: + info.append( + [ + "bhess", + "Apply constraint to input geometry during mRRHO calculation", + ] + ) + info.append(["temperature", "temperature"]) + optionsexchange = {True: "on", False: "off"} + for item in info: + if item[0] == "justprint": + print(item[1:][0]) + else: + if item[0] == "printoption": + option = item[2] + else: + option = getattr(config, item[0]) + if option is True or option is False: + option = optionsexchange[option] + elif isinstance(option, list): + option = ", ".join(option) + print( + "{}: {:{digits}} {}".format( + item[1], "", option, digits=DIGILEN - len(item[1]) + ) + ) + print("") + # end print + + calculate = [] # has to be calculated in this run + prev_calculated = [] # was already calculated in a previous run + try: + store_confs + except NameError: + store_confs = [] # stores all confs which are sorted out! + + if config.solvent == "gas": + print("Calculating single-point energies:") + else: + print("Calculating single-point energies and solvation contribution (G_solv):") + + # setup queues + q = Queue() + resultq = Queue() + + if config.prog == "tm": + job = TmJob + elif config.prog == "orca": + job = OrcaJob + + for conf in list(conformers): + if conf.removed: + store_confs.append(conformers.pop(conformers.index(conf))) + print(f"CONF{conf.id} is removed as requested by the user.") + continue + if conf.id > config.nconf: + store_confs.append(conformers.pop(conformers.index(conf))) + continue + if conf.prescreening_sp_info["info"] == "not_calculated": + conf = conformers.pop(conformers.index(conf)) + calculate.append(conf) + elif conf.prescreening_sp_info["info"] == "failed": + conf = conformers.pop(conformers.index(conf)) + store_confs.append(conf) + print(f"Calculation of CONF{conf.id} failed in the previous run.") + elif conf.prescreening_sp_info["info"] == "calculated": + conf = conformers.pop(conformers.index(conf)) + if config.solvent != "gas": + # check if solvation calculation calculated as well! + if conf.prescreening_gsolv_info["info"] == "failed": + store_confs.append(conf) + print( + f"Calculation of the solvation contribution for CONF" + f"{conf.id} failed in the previous run." + ) + elif ( + conf.prescreening_gsolv_info["info"] == "not_calculated" + and config.smgsolv1 in config.smgsolv_1 + ): + # additive solvation + calculate.append(conf) + else: + # implicit solvation + conf.job["success"] = True + prev_calculated.append(conf) + elif config.solvent == "gas": + conf.job["success"] = True + prev_calculated.append(conf) + if not calculate and not prev_calculated: + print("ERROR: No conformers left!") + if prev_calculated: + check_for_folder(config.cwd, [i.id for i in prev_calculated], config.func) + print("The prescreening_single-point was calculated before for:") + print_block(["CONF" + str(i.id) for i in prev_calculated]) + pl = config.lenconfx + 4 + len(str("/" + config.func)) + + instruction = { + "prepinfo": ["low+"], # TM: m4 scfconv 6 + "func": config.func, + "basis": getattr( + config, "basis", config.func_basis_default.get(config.func, "def2-mTZVPP") + ), + "charge": config.charge, + "unpaired": config.unpaired, + "solvent": config.solvent, + "sm": config.smgsolv1, + "omp": config.omp, + "temperature": config.temperature, + "gfn_version": config.part1_gfnv, + "energy": 0.0, + "energy2": 0.0, + "success": False, + } + + if config.solvent == "gas": + instruction["jobtype"] = "sp" + instruction["method"], _ = config.get_method_name( + "sp", func=instruction["func"], basis=instruction["basis"] + ) + name = "prescreening_single-point" + folder = instruction["func"] + if config.prog == "orca": + instruction["progpath"] = config.external_paths["orcapath"] + else: + if config.smgsolv1 in config.smgsolv_1: + # additive Gsolv + # COSMORS + if config.smgsolv1 != "dcosmors" and "cosmors" in config.smgsolv1: + job = TmJob + exc_fine = {"cosmors": "normal", "cosmors-fine": "fine"} + tmp = { + "jobtype": "cosmors", + "cosmorssetup": config.external_paths["cosmorssetup"], + "cosmorsparam": exc_fine.get(config.smgsolv1, "normal"), + "cosmothermversion": config.external_paths["cosmothermversion"], + } + instruction.update(tmp) + instruction["method"], instruction["method2"] = config.get_method_name( + "cosmors", + func=instruction["func"], + basis=instruction["basis"], + sm=instruction["sm"], + ) + name = "prescreening COSMO-RS" + folder = str(instruction["func"]) + "/COSMO" + # GBSA-Gsolv / ALPB-Gsolv + elif instruction["sm"] in ("gbsa_gsolv", "alpb_gsolv"): + # do DFT gas phase sp and additive Gsolv + instruction["jobtype"] = instruction["sm"] + if config.prog == "orca": + instruction["progpath"] = config.external_paths["orcapath"] + instruction["xtb_driver_path"] = config.external_paths["xtbpath"] + instruction["method"], instruction["method2"] = config.get_method_name( + instruction["jobtype"], + func=instruction["func"], + basis=instruction["basis"], + sm=instruction["sm"], + gfn_version=instruction["gfn_version"], + ) + if ( + conf.prescreening_sp_info["info"] == "calculated" + and conf.prescreening_sp_info["method"] == instruction["method"] + ): + # do not calculate gas phase sp again! + instruction["energy"] = conf.prescreening_sp_info["energy"] + instruction["prepinfo"] = [] + name = "prescreening " + str(instruction["sm"]).upper() + folder = str(instruction["func"]) + "/Gsolv" + # SMD-Gsolv + elif instruction["sm"] == "smd_gsolv": + job = OrcaJob + instruction["jobtype"] = instruction["sm"] + instruction["progpath"] = config.external_paths["orcapath"] + instruction["method"], instruction["method2"] = config.get_method_name( + "smd_gsolv", + func=instruction["func"], + basis=instruction["basis"], + sm=instruction["sm"], + ) + name = "prescreening" + str(instruction["sm"]).upper() + folder = str(instruction["func"]) + "/Gsolv" + else: + # with implicit solvation + instruction["jobtype"] = "sp_implicit" + # instruction["prepinfo"] = ["low+"] + if config.prog == "orca": + instruction["progpath"] = config.external_paths["orcapath"] + instruction["method"], instruction["method2"] = config.get_method_name( + "sp_implicit", + func=instruction["func"], + basis=instruction["basis"], + sm=instruction["sm"], + ) + name = "prescreening_single-point" + folder = instruction["func"] + + check = {True: "was successful", False: "FAILED"} + if calculate: + print(f"The {name} is calculated for:") + print_block(["CONF" + str(i.id) for i in calculate]) + # create folders: + save_errors, store_confs, calculate = new_folders( + config.cwd, calculate, config.func, save_errors, store_confs + ) + # write coord to folder + calculate, store_confs, save_errors = ensemble2coord( + config, config.func, calculate, store_confs, save_errors + ) + if config.solvent != "gas": + if folder != str(config.func): + # create the COSMO folder + save_errors, store_confs, calculate = new_folders( + config.cwd, calculate, folder, save_errors, store_confs + ) + # write the coord file to the COSMO folder + calculate, store_confs, save_errors = ensemble2coord( + config, folder, calculate, store_confs, save_errors + ) + + # parallel calculation: + calculate = run_in_parallel( + config, q, resultq, job, config.maxthreads, calculate, instruction, folder + ) + + for conf in list(calculate): + if instruction["jobtype"] == "sp": + line = ( + f"{name} calculation {check[conf.job['success']]}" + f" for {last_folders(conf.job['workdir'], 2):>{pl}}: " + f"{conf.job['energy']:>.8f}" + ) + print(line) + if not conf.job["success"]: + save_errors.append(line) + conf.prescreening_sp_info["info"] = "failed" + conf.prescreening_sp_info["method"] = conf.job["method"] + store_confs.append(calculate.pop(calculate.index(conf))) + else: + conf.prescreening_sp_info["energy"] = conf.job["energy"] + conf.prescreening_sp_info["info"] = "calculated" + conf.prescreening_sp_info["method"] = conf.job["method"] + elif instruction["jobtype"] == "sp_implicit": + line = ( + f"{name} calculation {check[conf.job['success']]} for " + f"{last_folders(conf.job['workdir'], 2):>{pl}}: " + f"{conf.job['energy']:>.8f}" + ) + print(line) + if not conf.job["success"]: + save_errors.append(line) + conf.prescreening_sp_info["info"] = "failed" + conf.prescreening_sp_info["method"] = conf.job["method"] + store_confs.append(calculate.pop(calculate.index(conf))) + else: + conf.prescreening_sp_info["energy"] = conf.job["energy"] + conf.prescreening_sp_info["info"] = "calculated" + conf.prescreening_sp_info["method"] = conf.job["method"] + elif instruction["jobtype"] in ( + "cosmors", + "smd_gsolv", + "gbsa_gsolv", + "alpb_gsolv", + ): + line = ( + f"{name} calculation {check[conf.job['success']]} for " + f"{last_folders(conf.job['workdir'], 3):>{pl}}: " + f"{conf.job['energy2']:>.8f}" + ) + print(line) + if not conf.job["success"]: + save_errors.append(line) + conf.prescreening_sp_info["info"] = "failed" + conf.prescreening_sp_info["method"] = conf.job["method"] + conf.prescreening_gsolv_info["info"] = "failed" + conf.prescreening_gsolv_info["method"] = conf.job["method2"] + store_confs.append(calculate.pop(calculate.index(conf))) + else: + conf.prescreening_sp_info["energy"] = conf.job["energy"] + conf.prescreening_sp_info["info"] = "calculated" + conf.prescreening_sp_info["method"] = conf.job["method"] + conf.prescreening_gsolv_info["gas-energy"] = conf.job["energy"] + conf.prescreening_gsolv_info["energy"] = conf.job["energy2"] + conf.prescreening_gsolv_info["info"] = "calculated" + conf.prescreening_gsolv_info["method"] = conf.job["method2"] + else: + print( + f'UNEXPECTED BEHAVIOUR: {conf.job["success"]} {conf.job["jobtype"]}' + ) + # save current data to jsonfile + config.write_json( + config.cwd, + [i.provide_runinfo() for i in calculate] + + [i.provide_runinfo() for i in prev_calculated] + + [i.provide_runinfo() for i in store_confs] + + [ensembledata], + config.provide_runinfo(), + ) + check_tasks(calculate, config.check) + else: + print("No conformers are considered additionally.") + if prev_calculated: + # adding conformers calculated before: + for conf in list(prev_calculated): + conf.job["workdir"] = os.path.normpath( + os.path.join(config.cwd, "CONF" + str(conf.id), config.func) + ) + if instruction["jobtype"] in ("sp", "sp_implicit"): + print( + f"{name} calculation {check[conf.job['success']]} for " + f"{last_folders(conf.job['workdir'], 2):>{pl}}: " + f"{conf.prescreening_sp_info['energy']:>.8f}" + ) + elif instruction["jobtype"] in ( + "cosmors", + "smd_gsolv", + "gbsa_gsolv", + "alpb_gsolv", + ): + print( + f"{name} calculation {check[conf.job['success']]} for " + f"{last_folders(conf.job['workdir'], 3):>{pl}}: " + f"{conf.prescreening_gsolv_info['energy']:>.8f}" + ) + calculate.append(prev_calculated.pop(prev_calculated.index(conf))) + for conf in calculate: + conf.reset_job_info() + if not calculate: + print("ERROR: No conformers left!") + print("Going to exit!") + sys.exit(1) + + # *************************************************************************** + # first sorting by E or Gsolv + # (remove high lying conformers above part1_threshold + 1.5 kcal/mol) + print("\n" + "".ljust(int(PLENGTH / 2), "-")) + print("Removing high lying conformers".center(int(PLENGTH / 2), " ")) + print("".ljust(int(PLENGTH / 2), "-") + "\n") + + for conf in calculate: + rrho = None + if config.solvent == "gas": + solv = None + else: + solv = "prescreening_gsolv_info" + e = "prescreening_sp_info" + conf.calc_free_energy(e=e, solv=solv, rrho=rrho) + try: + minfree = min([i.free_energy for i in calculate if i is not None]) + except ValueError: + raise + for conf in calculate: + conf.rel_free_energy = (conf.free_energy - minfree) * AU2KCAL + try: + maxreldft = max([i.rel_free_energy for i in calculate if i is not None]) + except ValueError: + print("ERROR: No conformer left or Error in maxreldft!") + # print sorting + columncall = [ + lambda conf: "CONF" + str(getattr(conf, "id")), + lambda conf: getattr(conf, "xtb_energy"), + lambda conf: getattr(conf, "rel_xtb_energy"), + lambda conf: getattr(conf, "prescreening_sp_info")["energy"], + lambda conf: getattr(conf, "prescreening_gsolv_info")["energy"], + lambda conf: getattr(conf, "free_energy"), + lambda conf: getattr(conf, "rel_free_energy"), + ] + columnheader = [ + "CONF#", + "E(GFNn-xTB)", + "ΔE(GFNn-xTB)", + "E [Eh]", + "Gsolv [Eh]", + "Gtot", + "ΔGtot", + ] + columndescription = ["", "[a.u.]", "[kcal/mol]", "", "", "[Eh]", "[kcal/mol]"] + columndescription2 = ["", "", "", "", "", "", "", ""] + columnformat = ["", (12, 7), (5, 2), (12, 7), (12, 7), (12, 7), (5, 2)] + + if config.solvent == "gas": + columnheader[5] = "Etot" + columnheader[6] = "ΔEtot" + columndescription[3] = instruction["method"] + # ignore gsolv in printout + columncall.pop(4) + columnheader.pop(4) + columndescription.pop(4) + columnformat.pop(4) + elif config.solvent != "gas": + # energy + columndescription[3] = instruction["method"] + # gsolv + columndescription[4] = instruction["method2"] + + calculate.sort(key=lambda x: int(x.id)) + printout( + os.path.join(config.cwd, "part1preG.dat"), + columncall, + columnheader, + columndescription, + columnformat, + calculate, + minfree, + columndescription2=columndescription2, + ) + + if maxreldft > (config.part1_threshold): + print("\n" + "".ljust(int(PLENGTH / 2), "-")) + print("Conformers considered further".center(int(PLENGTH / 2), " ")) + print("".ljust(int(PLENGTH / 2), "-") + "\n") + for conf in list(calculate): + if conf.rel_free_energy > (config.part1_threshold): + store_confs.append(calculate.pop(calculate.index(conf))) + if calculate: + print(f"Below the threshold of {config.part1_threshold} kcal/mol.\n") + print_block(["CONF" + str(i.id) for i in calculate]) + else: + print("Error: There are no more conformers left!") + else: + print( + "\nAll relative (free) energies are below the threshold " + f"of ({config.part1_threshold} kcal/mol.\nAll conformers are " + "considered further." + ) + ensembledata.nconfs_per_part["part1_firstsort"] = len(calculate) + # reset + for conf in calculate: + conf.free_energy = 0.0 + conf.rel_free_energy = None + print("".ljust(int(PLENGTH / 2), "-")) + # *************************************************************************** + if config.evaluate_rrho: + # check if prescreening rrho has been calculated + if config.solvent == "gas": + print("\nCalculating prescreening G_mRRHO!") + else: + print("\nCalculating prescreening G_mRRHO with implicit solvation!") + + for conf in list(calculate): + if conf.prescreening_grrho_info["info"] == "not_calculated": + pass + elif conf.prescreening_grrho_info["info"] == "failed": + conf = calculate.pop(calculate.index(conf)) + conf.__class__ = job + store_confs.append(conf) + print(f"Calculation of CONF{conf.id} failed in the previous run!") + elif conf.prescreening_grrho_info["info"] == "calculated": + conf = calculate.pop(calculate.index(conf)) + conf.__class__ = job + conf.job["success"] = True + prev_calculated.append(conf) + + if not calculate and not prev_calculated: + print("ERROR: No conformers left!") + print("Going to exit!") + sys.exit(1) + folderrho = "rrho_part1" + if prev_calculated: + check_for_folder(config.cwd, [i.id for i in prev_calculated], folderrho) + print("The prescreening G_mRRHO calculation was performed before for:") + print_block(["CONF" + str(i.id) for i in prev_calculated]) + pl = config.lenconfx + 4 + len(str("/" + folderrho)) + instruction_prerrho = { + "jobtype": "rrhoxtb", + "func": getattr(config, "part1_gfnv"), + "gfn_version": getattr(config, "part1_gfnv"), + "temperature": config.temperature, + "charge": config.charge, + "unpaired": config.unpaired, + "solvent": config.solvent, + "omp": config.omp, + "progpath": config.external_paths["xtbpath"], + "bhess": config.bhess, + "consider_sym": config.consider_sym, + "sm_rrho": config.sm_rrho, + "rmsdbias": config.rmsdbias, + "cwd": config.cwd, + "copymos": "", + "sym": "c1", + "multiTemp": False, + "energy": 0.0, + "energy2": 0.0, + "success": False, + } + + instruction_prerrho["method"], _ = config.get_method_name( + "rrhoxtb", + bhess=config.bhess, + gfn_version=instruction_prerrho["gfn_version"], + sm=instruction_prerrho["sm_rrho"], + solvent=instruction_prerrho["solvent"], + ) + if calculate: + print("The prescreening G_mRRHO calculation is now performed for:") + print_block(["CONF" + str(i.id) for i in calculate]) + # create folders: + save_errors, store_confs, calculate = new_folders( + config.cwd, calculate, folderrho, save_errors, store_confs + ) + # write coord to folder + calculate, store_confs, save_errors = ensemble2coord( + config, folderrho, calculate, store_confs, save_errors + ) + calculate = run_in_parallel( + config, + q, + resultq, + job, + config.maxthreads, + calculate, + instruction_prerrho, + folderrho, + ) + check = {True: "was successful", False: "FAILED"} + # check if too many calculations failed + + ### + for conf in list(calculate): + print( + f"The prescreening G_mRRHO calculation @ {conf.job['symmetry']} " + f"{check[conf.job['success']]} for " + f"{last_folders(conf.job['workdir'], 2):>{pl}}: " + f"{conf.job['energy']:>.8f}" + ) + if not conf.job["success"]: + conf.prescreening_grrho_info["info"] = "failed" + store_confs.append(calculate.pop(calculate.index(conf))) + else: + conf.sym = conf.job["symmetry"] + conf.prescreening_grrho_info["rmsd"] = conf.job["rmsd"] + conf.prescreening_grrho_info["energy"] = conf.job["energy"] + conf.prescreening_grrho_info["info"] = "calculated" + conf.prescreening_grrho_info["method"] = instruction_prerrho[ + "method" + ] + + # save current data to jsonfile + config.write_json( + config.cwd, + [i.provide_runinfo() for i in calculate] + + [i.provide_runinfo() for i in prev_calculated] + + [i.provide_runinfo() for i in store_confs] + + [ensembledata], + config.provide_runinfo(), + ) + check_tasks(calculate, config.check) + else: + print("No conformers are considered additionally.") + # adding conformers calculated before: + if prev_calculated: + for conf in list(prev_calculated): + conf.job["workdir"] = os.path.normpath( + os.path.join(config.cwd, "CONF" + str(conf.id), folderrho) + ) + print( + f"The prescreening G_mRRHO calculation @ {conf.sym} " + f"{check[conf.job['success']]} for " + f"{last_folders(conf.job['workdir'], 2):>{pl}}: " + f"{conf.prescreening_grrho_info['energy']:>.8f}" + ) + calculate.append(prev_calculated.pop(prev_calculated.index(conf))) + if not calculate: + print("ERROR: No conformers left!") + print("Going to exit!") + sys.exit(1) + + # save current data to jsonfile + config.write_json( + config.cwd, + [i.provide_runinfo() for i in calculate] + + [i.provide_runinfo() for i in prev_calculated] + + [i.provide_runinfo() for i in store_confs] + + [ensembledata], + config.provide_runinfo(), + ) + + # # printout for part1 ------------------------------------------------------- + print("\n" + "".ljust(int(PLENGTH / 2), "-")) + print("* Gibbs free energies of part1 *".center(int(PLENGTH / 2), " ")) + print("".ljust(int(PLENGTH / 2), "-") + "\n") + columncall = [ + lambda conf: "CONF" + str(getattr(conf, "id")), + lambda conf: getattr(conf, "xtb_free_energy"), + lambda conf: getattr(conf, "rel_xtb_free_energy"), + lambda conf: getattr(conf, "prescreening_sp_info")["energy"], + lambda conf: getattr(conf, "prescreening_gsolv_info")["energy"], + lambda conf: getattr(conf, "prescreening_grrho_info")["energy"], + lambda conf: getattr(conf, "free_energy"), + lambda conf: getattr(conf, "rel_free_energy"), + ] + columnheader = [ + "CONF#", + "G(GFNn-xTB)", + "ΔG(GFNn-xTB)", + "E [Eh]", + "Gsolv [Eh]", + "GmRRHO [Eh]", + "Gtot", + "ΔGtot", + ] + columndescription = [ + "", # CONFX + "[a.u.]", # xtb energy + "[kcal/mol]", # rel xtb_energy + str(config.func), # E + "", # GSolv + "", + "[Eh]", # Gtot + "[kcal/mol]", # rel Gtot + ] + columndescription2 = ["", "", "", "", "", "", "", ""] + columnformat = ["", (12, 7), (5, 2), (12, 7), (12, 7), (12, 7), (12, 7), (5, 2)] + if config.solvent == "gas": + # Energy + columndescription[3] = instruction["method"] + elif config.solvent != "gas": + # Energy + columndescription[3] = instruction["method"] + # Gsolv + columndescription[4] = instruction["method2"] + if config.evaluate_rrho: + columndescription[5] = instruction_prerrho["method"] # Grrho + + if not config.evaluate_rrho or config.solvent == "gas": + if not config.evaluate_rrho: + # ignore rrho in printout + columncall.pop(5) + columnheader.pop(5) + columndescription.pop(5) + columnformat.pop(5) + if config.solvent == "gas": + columncall.pop(4) + columnheader.pop(4) + columndescription.pop(4) + columnformat.pop(4) + + for conf in calculate: + if not config.evaluate_rrho: + rrho = None + else: + rrho = "prescreening_grrho_info" + if config.solvent == "gas": + solv = None + else: + solv = "prescreening_gsolv_info" + conf.calc_free_energy(e="prescreening_sp_info", solv=solv, rrho=rrho) + conf.xtb_free_energy = conf.calc_free_energy( + e="xtb_energy", solv=None, rrho=rrho, out=True + ) + + try: + minfree = min([i.free_energy for i in calculate if i is not None]) + minfree_xtb = min([i.xtb_free_energy for i in calculate if i is not None]) + except ValueError: + raise ValueError + for conf in calculate: + conf.rel_free_energy = (conf.free_energy - minfree) * AU2KCAL + conf.rel_xtb_free_energy = (conf.xtb_free_energy - minfree_xtb) * AU2KCAL + try: + maxreldft = max([i.rel_free_energy for i in calculate if i is not None]) + except ValueError: + print("ERROR: No conformer left or Error in maxreldft!") + # print sorting + calculate.sort(key=lambda x: int(x.id)) + printout( + os.path.join(config.cwd, "part1.dat"), + columncall, + columnheader, + columndescription, + columnformat, + calculate, + minfree, + columndescription2=columndescription2, + ) + # -------------------------------------------------------------------------- + for conf in calculate: + if conf.free_energy == minfree: + ensembledata.bestconf["part1"] = conf.id + + # write to enso.json + config.write_json( + config.cwd, + [i.provide_runinfo() for i in calculate] + + [i.provide_runinfo() for i in prev_calculated] + + [i.provide_runinfo() for i in store_confs] + + [ensembledata], + config.provide_runinfo(), + ) + + # *************************************************************************** + # Fuzzy or smart sorting + # increase the individual threshold for conformers with GRRHO differing from + # the mean GmRRHO + if len(calculate) == 1: + std_dev = 0.0 + else: + std_dev = calc_std_dev( + [ + conf.prescreening_grrho_info["energy"] * AU2KCAL + for conf in calculate + if conf.prescreening_grrho_info["energy"] is not None + ] + ) + max_fuzzy = 1 + fuzzythr = max_fuzzy * (1 - math.exp(-1 * 5 * (std_dev ** 2))) + print( + "\nAdditional global 'fuzzy-threshold' based on the standard deviation of (G_mRRHO):" + ) + print(f"Std_dev(G_mRRHO) = {std_dev:.3f} kcal/mol") + print(f"Fuzzythreshold = {fuzzythr:.3f} kcal/mol") + print( + f"Final sorting threshold = {config.part1_threshold:.3f} + " + f"{fuzzythr:.3f} = {config.part1_threshold + fuzzythr:.3f} kcal/mol" + ) + for conf in calculate: + conf.prescreening_grrho_info["fuzzythr"] = fuzzythr + + # spearman between DFT and DFT + RRHO + if config.evaluate_rrho and len(calculate) > 1: + for conf in calculate: + rrho = None + if config.solvent == "gas": + solv = None + else: + solv = "prescreening_gsolv_info" + e = "prescreening_sp_info" + conf.calc_free_energy(e=e, solv=solv, rrho=rrho) + try: + minfree = min([i.free_energy for i in calculate if i is not None]) + except ValueError: + raise ValueError + without_RRHO = [] + calculate.sort(key=lambda x: int(x.id)) + for conf in calculate: + without_RRHO.append((conf.free_energy - minfree) * AU2KCAL) + for conf in calculate: + conf.free_energy = 0.0 + for conf in calculate: + rrho = "prescreening_grrho_info" + if config.solvent == "gas": + solv = None + else: + solv = "prescreening_gsolv_info" + e = "prescreening_sp_info" + conf.calc_free_energy(e=e, solv=solv, rrho=rrho) + try: + minfree = min([i.free_energy for i in calculate if i is not None]) + except ValueError: + raise ValueError + with_RRHO = [] + calculate.sort(key=lambda x: int(x.id)) + for conf in calculate: + with_RRHO.append((conf.free_energy - minfree) * AU2KCAL) + for conf in calculate: + conf.free_energy = 0.0 + if config.solvent != "gas": + print( + f"Spearman correlation coefficient between (E + Solv) " + f"and (E + Solv + mRRHO) = {spearman(without_RRHO, with_RRHO):.3f}" + ) + else: + print( + f"Spearman correlation coefficient between (E) " + f"and (E + mRRHO) = {spearman(without_RRHO, with_RRHO):.3f}" + ) + + # sorting + if maxreldft > config.part1_threshold: + print("\n" + "".ljust(int(PLENGTH / 2), "-")) + print("Conformers considered further".center(int(PLENGTH / 2), " ")) + print("".ljust(int(PLENGTH / 2), "-") + "\n") + for conf in list(calculate): + if conf.rel_free_energy <= config.part1_threshold: + conf.part_info["part1"] = "passed" + elif conf.rel_free_energy <= ( + config.part1_threshold + conf.prescreening_grrho_info["fuzzythr"] + ): + print(f"Considered CONF{conf.id} because of increased fuzzythr.") + conf.part_info["part1"] = "passed" + continue + else: + conf.part_info["part1"] = "refused" + store_confs.append(calculate.pop(calculate.index(conf))) + if calculate: + print( + f"These conformers are below the {config.part1_threshold+fuzzythr:.3f} " + f"kcal/mol threshold.\n" + ) + print_block(["CONF" + str(i.id) for i in calculate]) + else: + print("Error: There are no more conformers left!") + else: + for conf in list(calculate): + conf.part_info["part1"] = "passed" + print( + "\nAll relative (free) energies are below the initial threshold " + f"of {config.part1_threshold} kcal/mol.\nAll conformers are " + "considered further." + ) + ensembledata.nconfs_per_part["part1"] = len(calculate) + # save current data to jsonfile + config.write_json( + config.cwd, + [i.provide_runinfo() for i in calculate] + + [i.provide_runinfo() for i in prev_calculated] + + [i.provide_runinfo() for i in store_confs] + + [ensembledata], + config.provide_runinfo(), + ) + + # free energy: + for conf in calculate: + if not config.evaluate_rrho: + rrho = None + else: + rrho = "prescreening_grrho_info" + if config.solvent == "gas": + solv = None + else: + solv = "prescreening_gsolv_info" + conf.calc_free_energy(e="prescreening_sp_info", solv=solv, rrho=rrho) + + # write coord.enso_best + for conf in calculate: + if conf.id == ensembledata.bestconf["part1"]: + # copy the lowest optimized conformer to file coord.enso_best + with open( + os.path.join("CONF" + str(conf.id), config.func, "coord"), + "r", + encoding=CODING, + newline=None, + ) as f: + coord = f.readlines() + with open( + os.path.join(config.cwd, "coord.enso_best"), "w", newline=None + ) as best: + best.write( + "$coord # {} {} !CONF{} \n".format( + conf.free_energy, + conf.prescreening_grrho_info["energy"], + conf.id, + ) + ) + for line in coord[1:]: + if "$" in line: # stop at $end ... + break + best.write(line) + best.write("$end \n") + + ################################################################################ + # calculate average G correction + print( + "\nCalculating Boltzmann averaged free energy of ensemble on " + f"input geometries (not DFT optimized)!\n" + ) + # calculate Boltzmannweights + if not config.evaluate_rrho or config.solvent == "gas": + if not config.evaluate_rrho and config.solvent == "gas": + line = ( + f"{'temperature /K:':<15} {'avE(T) /a.u.':>14} " + f"{'avG(T) /a.u.':>14} " + ) + elif not config.evaluate_rrho: + line = ( + f"{'temperature /K:':<15} {'avE(T) /a.u.':>14} " + f"{'avGsolv(T) /a.u.':>16} {'avG(T) /a.u.':>14} " + ) + elif config.solvent == "gas": + line = ( + f"{'temperature /K:':<15} {'avE(T) /a.u.':>14} " + f"{'avGmRRHO(T) /a.u.':>16} {'avG(T) /a.u.':>14} " + ) + else: + line = ( + f"{'temperature /K:':<15} {'avE(T) /a.u.':>14} " + f"{'avGmRRHO(T) /a.u.':>16} {'avGsolv(T) /a.u.':>16} " + f"{'avG(T) /a.u.':>14}" + ) + print(line) + print("".ljust(int(PLENGTH), "-")) + # get free energy at (T) + for conf in calculate: + if not config.evaluate_rrho: + rrho = None + else: + rrho = "prescreening_grrho_info" + if config.solvent == "gas": + solv = None + else: + solv = "prescreening_gsolv_info" + e = "prescreening_sp_info" + conf.calc_free_energy(e=e, solv=solv, rrho=rrho) + + calculate = calc_boltzmannweights(calculate, "free_energy", config.temperature) + avG = 0.0 + avE = 0.0 + avGRRHO = 0.0 + avGsolv = 0.0 + for conf in calculate: + avG += conf.bm_weight * conf.free_energy + avE += conf.bm_weight * conf.prescreening_sp_info["energy"] + avGRRHO += conf.bm_weight * conf.prescreening_grrho_info["energy"] + avGsolv += conf.bm_weight * conf.prescreening_gsolv_info["energy"] + + # printout: + if not config.evaluate_rrho or config.solvent == "gas": + if not config.evaluate_rrho and config.solvent == "gas": + line = f"{config.temperature:^15} {avE:>14.7f} {avG:>14.7f} " + elif not config.evaluate_rrho: + line = ( + f"{config.temperature:^15} {avE:>14.7f} {avGsolv:>16.7f} " + f"{avG:>14.7f} " + ) + elif config.solvent == "gas": + line = ( + f"{config.temperature:^15} {avE:>14.7f} {avGRRHO:>16.7f} " + f"{avG:>14.7f} " + ) + else: + line = ( + f"{config.temperature:^15} {avE:>14.7f} {avGRRHO:>16.7f} " + f"{avGsolv:>16.7f} {avG:>14.7f} " + ) + print(line, " <<==part1==") + print("".ljust(int(PLENGTH), "-")) + print("") + + ################################################################################ + + print("\nCalculating unbiased GFNn-xTB energy") + instruction_gfn = { + "jobtype": "xtb_sp", + "func": getattr(config, "part1_gfnv"), + "charge": config.charge, + "unpaired": config.unpaired, + "solvent": config.solvent, + "progpath": config.external_paths["xtbpath"], + "sm": config.sm_rrho, + "rmsdbias": config.rmsdbias, + "omp": config.omp, + "temperature": config.temperature, + "gfn_version": config.part1_gfnv, + "energy": 0.0, + "energy2": 0.0, + "success": False, + } + if calculate: + folder_gfn = "GFN_unbiased" + save_errors, store_confs, calculate = new_folders( + config.cwd, calculate, folder_gfn, save_errors, store_confs + ) + # write coord to folder + calculate, store_confs, save_errors = ensemble2coord( + config, folder_gfn, calculate, store_confs, save_errors + ) + calculate = run_in_parallel( + config, + q, + resultq, + job, + config.maxthreads, + calculate, + instruction_gfn, + folder_gfn, + ) + for conf in list(calculate): + if not conf.job["success"]: + conf.xtb_energy_unbiased = conf.xtb_energy + else: + conf.xtb_energy_unbiased = conf.job["energy"] + # write ensemble + move_recursively(config.cwd, "enso_ensemble_part1.xyz") + if config.evaluate_rrho: + kwargs = {"energy": "xtb_energy_unbiased", "rrho": "prescreening_grrho_info"} + else: + kwargs = {"energy": "xtb_energy_unbiased"} + write_trj( + sorted(calculate, key=lambda x: float(x.free_energy)), + config.cwd, + "enso_ensemble_part1.xyz", + config.func, + config.nat, + "free_energy", + **kwargs, + ) + + # reset + for conf in calculate: + conf.free_energy = 0.0 + conf.rel_free_energy = None + conf.bm_weight = 0.0 + conf.reset_job_info() + if save_errors: + print( + "***---------------------------------------------------------***", + file=sys.stderr, + ) + print( + "Printing most relevant errors again, just for user convenience:", + file=sys.stderr, + ) + for _ in list(save_errors): + print(save_errors.pop(), file=sys.stderr) + print( + "***---------------------------------------------------------***", + file=sys.stderr, + ) + + tmp = int((PLENGTH - len("END of Part1")) / 2) + print("\n" + "".ljust(tmp, ">") + "END of Part1" + "".rjust(tmp, "<")) + return config, calculate, store_confs, ensembledata diff --git a/censo_qm/qm_job.py b/censo_qm/qm_job.py new file mode 100644 index 0000000..16cc6a3 --- /dev/null +++ b/censo_qm/qm_job.py @@ -0,0 +1,611 @@ +""" +Contains QmJob base class for calculating QM related properties of conformers. +Additionally contains functions which should be present irrespective of the QM +code. (xTB always available) +""" +import os +import math + +try: + from math import isclose +except ImportError: + from .utilities import isclose +import time +import subprocess +import json +from .cfg import ENVIRON, CODING, censo_solvent_db +from .utilities import last_folders, print +from .datastructure import MoleculeData + + +class QmJob(MoleculeData): + """ + QmJob base class for calculating QM related properties of conformers. + """ + + def __init__(self, rank, *args, **kwargs): + MoleculeData.__init__(self, rank, *args, **kwargs) + self.reset_job_info() + + def reset_job_info(self): + """ + Clear information/instructions from the previous job + """ + self.job = { + "jobtype": "", + "prepinfo": [], # additional info for cefine + "method": "", # description of the method + "method2": "", # description of the method + "workdir": "", + "copymos": "", + "omp": 1, + "charge": 0, + "unpaired": 0, + "gfn_version": None, + "bhess": None, + "consider_sym": False, + "symmetry": "C1", + "rmsdbias": False, + "sm_rrho": None, + "func": None, + "func2": None, # functional used in subsequent property calculation + "basis": None, + "solvent": "gas", + "sm": "gas-phase", + "rmsd": 0.0, # rmsd in case of bhess + "nat": None, # number of atoms + "onlyread": False, # don't calculate, just perform readout + "progpath": "", + "xtb_driver_path": "", # program path to xtb if xtb as driver + # optimization related: + "optcycles": None, # number of cycles that are allowed in the + "hlow": None, # setting for ancopt + "optlevel": None, + # geometry optimization + "cycles": 0, # number of cycles it needed for optimization convergence + "ecyc": [], + "decyc": [], + "grad_norm": 10.0, + "converged": False, + # temperature related: + "trange": [], # list with temperatures to evaluate G,H,S + "temperature": 298.15, + # nmrprop related: + "h_active": False, + "c_active": False, + "f_active": False, + "p_active": False, + "si_active": False, + # optical rotation related: + "freq_or": [], + # return values which can be updated: + "success": False, + "energy": 0.0, + "energy2": 0.0, + "erange1": {}, + "erange2": {}, + "erange3": {}, + "errormessage": [], + "internal_error": [], + # + "cosmorsparam": "", # normal/fine + } + + def _sp(self, silent=False): + """ + single-point calculation + """ + pass + + def _opt(self): + """ + geometry optimization + """ + pass + + def _genericoutput(self): + """ + Read shielding and coupling constants and write them to plain output + The first natom lines contain the shielding constants, and from + line natom +1 the coupling constants are written. + """ + pass + + def _xtb_sp(self): + """ + Get single-point energy from xTB + """ + files = [ + "xtbrestart", + "xtbtopo.mol", + "xcontrol-inp", + "wbo", + "charges", + "gfnff_topo", + "sp.out", + ] + for file in files: + if os.path.isfile(os.path.join(self.job["workdir"], file)): + os.remove(os.path.join(self.job["workdir"], file)) + # run single-point: + + call = [ + self.job["progpath"], + "coord", + "--" + self.job["gfn_version"], + "--sp", + "--chrg", + str(self.job["charge"]), + "--norestart", + ] + if self.job["solvent"] != "gas": + call.extend( + [ + "--" + str(self.job["sm"]), + censo_solvent_db[self.job["solvent"]]["xtb"][1], + ] + ) + + with open( + os.path.join(self.job["workdir"], "sp.out"), "w", newline=None + ) as outputfile: + returncode = subprocess.call( + call, + shell=False, + stdin=None, + stderr=subprocess.STDOUT, + universal_newlines=False, + cwd=self.job["workdir"], + stdout=outputfile, + env=ENVIRON, + ) + if returncode != 0: + self.job["energy"] = 0.0 + self.job["success"] = False + print( + f"ERROR: {self.job['gfn_version']}-xTB error in " + f"{last_folders(self.job['workdir'], 2)}" + ) + return + # read gas phase energy: + if os.path.isfile(os.path.join(self.job["workdir"], "sp.out")): + with open( + os.path.join(self.job["workdir"], "sp.out"), + "r", + encoding=CODING, + newline=None, + ) as inp: + store = inp.readlines() + for line in store: + if "| TOTAL ENERGY" in line: + try: + self.job["energy"] = float(line.split()[3]) + self.job["success"] = True + except: + print( + "Error while converting " + "single-point in: {}".format( + last_folders(self.job["workdir"], 2) + ) + ) + self.job["energy"] = 0.0 + self.job["success"] = False + return + else: + self.job["energy"] = 0.0 + self.job["success"] = False + print( + f"ERROR: {self.job['gfn_version']}-xTB error in " + f"{last_folders(self.job['workdir'], 2)}" + ) + return + print( + f"{self.job['gfn_version']}-xTB energy for {last_folders(self.job['workdir'], 2)}" + f" = {self.job['energy']: >.7f}" + ) + + def _xtb_gsolv(self): + """ + Calculate additive GBSA or ALPB solvation contribution by + Gsolv = Esolv - Egas, + using xTB and the GFNn or GFN-FF hamiltonian. + --> return gsolv at energy2 + """ + tmp_gas = 0 + tmp_solv = 0 + if self.job["jobtype"] == "gbsa_gsolv": + xtbsm = "gbsa" + elif self.job["jobtype"] == "alpb_gsolv": + xtbsm = "alpb" + print( + f"Running {self.job['jobtype'].upper()} calculation in " + f"{last_folders(self.job['workdir'], 3)}" + ) + files = [ + "xtbrestart", + "xtbtopo.mol", + "xcontrol-inp", + "wbo", + "charges", + "gfnff_topo", + "gas.out", + "solv.out", + ] + for file in files: + if os.path.isfile(os.path.join(self.job["workdir"], file)): + os.remove(os.path.join(self.job["workdir"], file)) + # run gas phase single-point: + with open( + os.path.join(self.job["workdir"], "gas.out"), "w", newline=None + ) as outputfile: + returncode = subprocess.call( + [ + self.job["xtb_driver_path"], + "coord", + "--" + self.job["gfn_version"], + "--sp", + "--chrg", + str(self.job["charge"]), + "--norestart", + ], + shell=False, + stdin=None, + stderr=subprocess.STDOUT, + universal_newlines=False, + cwd=self.job["workdir"], + stdout=outputfile, + env=ENVIRON, + ) + if returncode != 0: + self.job["energy2"] = 0.0 + self.job["success"] = False + print( + f"ERROR: Gas phase {self.job['gfn_version']}-xTB error in " + f"{last_folders(self.job['workdir'], 3)}" + ) + return + # read gas phase energy: + if os.path.isfile(os.path.join(self.job["workdir"], "gas.out")): + with open( + os.path.join(self.job["workdir"], "gas.out"), + "r", + encoding=CODING, + newline=None, + ) as inp: + store = inp.readlines() + for line in store: + if "| TOTAL ENERGY" in line: + try: + tmp_gas = float(line.split()[3]) + self.job["success"] = True + except: + print( + "Error while converting gas phase " + "single-point in: {}".format( + last_folders(self.job["workdir"], 3) + ) + ) + tmp_gas = None + self.job["energy2"] = 0.0 + self.job["success"] = False + return + else: + self.job["energy2"] = 0.0 + self.job["success"] = False + print( + f"ERROR: Gas phase {self.job['gfn_version']}-xTB error in " + f"{last_folders(self.job['workdir'], 3)}" + ) + return + # run single-point in solution: + # ``reference'' corresponds to 1\;bar of ideal gas and 1\;mol/L of liquid + # solution at infinite dilution, + with open( + os.path.join(self.job["workdir"], "solv.out"), "w", newline=None + ) as outputfile: + returncode = subprocess.call( + [ + self.job["xtb_driver_path"], + "coord", + "--" + self.job["gfn_version"], + "--sp", + "--" + xtbsm, + censo_solvent_db[self.job["solvent"]]["xtb"][1], + "reference", + "--chrg", + str(self.job["charge"]), + "--norestart", + ], + shell=False, + stdin=None, + stderr=subprocess.STDOUT, + universal_newlines=False, + cwd=self.job["workdir"], + stdout=outputfile, + env=ENVIRON, + ) + if returncode != 0: + self.job["energy2"] = 0.0 + self.job["success"] = False + print( + f"ERROR: Solution phase {self.job['gfn_version']}-xTB error in " + f"{last_folders(self.job['workdir'], 3)}" + ) + return + time.sleep(0.05) + # #read solv.out + if os.path.isfile(os.path.join(self.job["workdir"], "solv.out")): + with open( + os.path.join(self.job["workdir"], "solv.out"), + "r", + encoding=CODING, + newline=None, + ) as inp: + store = inp.readlines() + for line in store: + if "| TOTAL ENERGY" in line: + try: + tmp_solv = float(line.split()[3]) + self.job["success"] = True + except: + print( + "Error while converting solution phase " + "single-point in: {}".format( + last_folders(self.job["workdir"], 3) + ) + ) + tmp_solv = None + self.job["energy2"] = 0.0 + self.job["success"] = False + return + else: + self.job["energy2"] = 0.0 + self.job["success"] = False + print( + f"ERROR: Solution phase {self.job['gfn_version']}-xTB error in " + f"{last_folders(self.job['workdir'], 3)}" + ) + return + if self.job["success"]: + if tmp_solv is None or tmp_gas is None: + self.job["energy2"] = 0.0 + self.job["success"] = False + else: + self.job["energy2"] = tmp_solv - tmp_gas + self.job["success"] = True + self.job["energy_xtb_gas"] = tmp_gas + self.job["energy_xtb_solv"] = tmp_solv + + def _xtbrrho(self): + """ + mRRHO contribution with GFNn/GFN-FF-XTB + """ + if not self.job["onlyread"]: + print( + f"Running {str(self.job['gfn_version']).upper()}-xTB mRRHO in " + f"{last_folders(self.job['workdir'], 2)}" + ) + files = [ + "xtbrestart", + "xtbtopo.mol", + "xcontrol-inp", + "wbo", + "charges", + "gfnff_topo", + ] + for file in files: + if os.path.isfile(os.path.join(self.job["workdir"], file)): + os.remove(os.path.join(self.job["workdir"], file)) + if self.job["trange"]: + for t in list(self.job["trange"]): + if isclose(self.job["temperature"], t, abs_tol=0.6): + self.job["trange"].pop(self.job["trange"].index(t)) + self.job["trange"].append(self.job["temperature"]) + with open( + os.path.join(self.job["workdir"], "xcontrol-inp"), "w", newline=None + ) as xcout: + xcout.write("$thermo\n") + if self.job["trange"]: + xcout.write( + f" temp=" + f"{','.join([str(i) for i in self.job['trange']])}\n" + ) + else: + xcout.write(" temp={}\n".format(self.job["temperature"])) + xcout.write(" sthr=50.0\n") + if self.job["bhess"]: + xcout.write(" imagthr={}\n".format("-100")) + else: + xcout.write(" imagthr={}\n".format("-50")) + xcout.write("$symmetry\n") + if self.job["consider_sym"]: + # xcout.write(" desy=0.1\n") # taken from xtb defaults + xcout.write(" maxat=1000\n") + else: + xcout.write(" desy=0.0\n") + xcout.write("$end\n") + if self.job["bhess"]: + # set ohess or bhess + dohess = "--bhess" + olevel = "vtight" + else: + dohess = "--ohess" + olevel = "vtight" + time.sleep(0.05) + with open( + os.path.join(self.job["workdir"], "ohess.out"), "w", newline=None + ) as outputfile: + if self.job["solvent"] != "gas": + callargs = [ + self.job["progpath"], + "coord", + "--" + str(self.job["gfn_version"]), + dohess, + olevel, + "--" + str(self.job["sm_rrho"]), + censo_solvent_db[self.job["solvent"]]["xtb"][1], + "--chrg", + str(self.job["charge"]), + "--enso", + "--norestart", + "-I", + "xcontrol-inp", + ] + else: + callargs = [ + self.job["progpath"], + "coord", + "--" + str(self.job["gfn_version"]), + dohess, + olevel, + "--chrg", + str(self.job["charge"]), + "--enso", + "--norestart", + "-I", + "xcontrol-inp", + ] + if self.job["rmsdbias"]: + callargs.extend( + [ + "--bias-input", + str(os.path.join(self.job["cwd"], "rmsdpot.xyz")), + ] + ) + returncode = subprocess.call( + callargs, + shell=False, + stdin=None, + stderr=subprocess.STDOUT, + universal_newlines=False, + cwd=self.job["workdir"], + stdout=outputfile, + env=ENVIRON, + ) + time.sleep(0.05) + # check if converged: + if returncode != 0: + self.job["energy"] = 0.0 + self.job["success"] = False + self.job["errormessage"].append( + f"ERROR: {str(self.job['gfn_version']).upper()}-xTB ohess error in " + f"{last_folders(self.job['workdir'], 2):18}" + ) + print(self.job["errormessage"][-1]) + return + # start reading output! + if self.job["trange"]: + if not os.path.isfile(os.path.join(self.job["workdir"], "ohess.out")): + self.job["energy"] = 0.0 + self.job["success"] = False + self.job["errormessage"].append( + f"ERROR: file {self.job['workdir']}/ohess.out could not be found!" + ) + print(self.job["errormessage"][-1]) + return + gt = {} + ht = {} + rotS = {} + with open( + os.path.join(self.job["workdir"], "ohess.out"), + "r", + encoding=CODING, + newline=None, + ) as inp: + store = inp.readlines() + # rotational entropy: + for line in store: + if "VIB" in line: + try: + realline = store.index(line) + 1 + T = float(line.split()[0]) + rotS[T] = float(store[realline].split()[4]) + except (KeyError, ValueError): + pass + for line in store: + if "T/K" in line: + start = store.index(line) + for line in store[start + 2 :]: + if "----------------------------------" in line: + break + else: + try: + T = float(line.split()[0]) + gt[T] = float(line.split()[4]) + ht[T] = float(line.split()[2]) + except (ValueError, KeyError): + print("ERROR: can not convert G(T)") + if len(self.job["trange"]) == len(gt): + self.job["success"] = True + self.job["erange1"] = gt + self.job["erange2"] = ht + self.job["erange3"] = rotS + else: + self.job["success"] = False + return + # end self.trange + if self.job["bhess"]: + # read rmsd_info + with open( + os.path.join(self.job["workdir"], "ohess.out"), + "r", + encoding=CODING, + newline=None, + ) as inp: + store = inp.readlines() + for line in store: + if "final rmsd / " in line: + try: + self.job["rmsd"] = float(line.split()[3]) + except (ValueError): + self.job["rmsd"] = 0.0 + if os.path.isfile(os.path.join(self.job["workdir"], "xtb_enso.json")): + with open( + os.path.join(self.job["workdir"], "xtb_enso.json"), + "r", + encoding=CODING, + newline=None, + ) as f: + data = json.load(f) + if "number of imags" in data: + if data["number of imags"] > 0: + print( + f"WARNING: found {data['number of imags']} significant" + f" imaginary frequencies in " + f"{last_folders(self.job['workdir'], 2)}" + ) + if "G(T)" in data: + if float(self.job["temperature"]) == 0: + self.job["energy"] = data.get("ZPVE", 0.0) + self.job["success"] = True + self.job["erange1"][self.job["temperature"]] = data.get("ZPVE", 0.0) + self.job["erange2"][self.job["temperature"]] = data.get("ZPVE", 0.0) + else: + self.job["energy"] = data.get("G(T)", 0.0) + self.job["erange1"][self.job["temperature"]] = data.get("G(T)", 0.0) + # self.job['erange2'][self.job['temperature']] = data.get("H(T)", 0.0) + self.job["success"] = True + if "point group" in data: + self.job["symmetry"] = data["point group"] + else: + print( + "Error while converting mRRHO in: {}".format( + last_folders(self.job["workdir"], 2) + ) + ) + self.job["energy"] = 0.0 + self.job["success"] = False + else: + print( + "WARNING: File {} doesn't exist!".format( + os.path.join(self.job["workdir"], "xtb_enso.json") + ) + ) + self.job["energy"] = 0.0 + self.job["success"] = False + + def execute(self): + """ + Chooses which function to execute based on jobtype. + """ + pass diff --git a/censo_qm/refinement.py b/censo_qm/refinement.py new file mode 100755 index 0000000..8cac7cf --- /dev/null +++ b/censo_qm/refinement.py @@ -0,0 +1,903 @@ +""" +REFINEMENT == Part3 +designed to yield high level free energies on dft optimized conformers. +""" +from multiprocessing import JoinableQueue as Queue +import shutil +import os +import sys +from .cfg import PLENGTH, CODING, AU2KCAL, DIGILEN +from .utilities import ( + check_for_folder, + print_block, + new_folders, + last_folders, + frange, + calc_boltzmannweights, + printout, + move_recursively, + write_trj, + check_tasks, + print, +) +from .parallel import run_in_parallel +from .orca_job import OrcaJob +from .tm_job import TmJob +from .datastructure import MoleculeData + + +def part3(config, conformers, store_confs, ensembledata): + """ + Refinement of the ensemble, at high level DFT (possibly with implicit solvation) + Calculate low level free energies with COSMO-RS single-point and gsolv + Input: + - config [conifg_setup object] contains all settings + - conformers [list of molecule_data objects] each conformer is represented + Return: + -> config + -> conformers + """ + save_errors = [] + print("\n" + "".ljust(PLENGTH, "-")) + print("CRE REFINEMENT - PART3".center(PLENGTH, " ")) + print("".ljust(PLENGTH, "-") + "\n") + # print flags for part3 + info = [] + info.append(["prog3", "program for part3"]) + info.append(["part3_threshold", "Boltzmann sum threshold employed"]) + info.append(["func3", "functional for part3"]) + info.append(["basis3", "basis set for part3"]) + if config.solvent != "gas": + info.append(["solvent", "solvent"]) + info.append(["smgsolv3", "solvent model"]) + info.append(["temperature", "temperature"]) + if config.multitemp: + info.append(["multitemp", "evalulate at different temperatures"]) + info.append( + [ + "printoption", + "temperature range", + [ + i + for i in frange( + config.trange[0], config.trange[1], config.trange[2] + ) + ], + ] + ) + info.append(["prog_rrho", "program for mRRHO contribution"]) + if config.prog_rrho == "xtb": + info.append(["part3_gfnv", "GFN version for mRRHO and/or GBSA_Gsolv"]) + if config.bhess: + info.append( + ["bhess", "Apply constraint to input geometry during mRRHO calculation"] + ) + optionsexchange = {True: "on", False: "off"} + for item in info: + if item[0] == "justprint": + print(item[1:][0]) + else: + if item[0] == "printoption": + option = item[2] + else: + option = getattr(config, item[0]) + if option is True or option is False: + option = optionsexchange[option] + elif isinstance(option, list): + option = [str(i) for i in option] + if len(str(option)) > 40: + length = 0 + reduced = [] + for i in option: + length += len(i) + 2 + if length < 40: + reduced.append(i) + reduced.append("...") + option = reduced + length = 0 + option = ", ".join(option) + print( + "{}: {:{digits}} {}".format( + item[1], "", option, digits=DIGILEN - len(item[1]) + ) + ) + print("") + # end print + + calculate = [] # has to be calculated in this run + prev_calculated = [] # was already calculated in a previous run + try: + store_confs + except NameError: + store_confs = [] # stores all confs which are sorted out! + + if config.solvent == "gas": + print("\nCalculating single-point energies!") + else: + print( + "\nCalculating single-point energies and solvation contribution (G_solv)!" + ) + + # setup queues + q = Queue() + resultq = Queue() + + if config.prog == "tm": + job = TmJob + elif config.prog == "orca": + job = OrcaJob + + for conf in list(conformers): + if conf.removed: + store_confs.append(conformers.pop(conformers.index(conf))) + print(f"CONF{conf.id} is removed as requested by the user!") + continue + if ( + conf.part_info["part2"] == "passed" + and conf.optimization_info["info"] == "calculated" + ): + if conf.highlevel_sp_info["info"] == "failed": + conf = conformers.pop(conformers.index(conf)) + store_confs.append(conf) + print(f"Calculation of CONF{conf.id} failed in the previous run!") + elif conf.highlevel_sp_info["info"] == "not_calculated": + # has to be calculated now + conf = conformers.pop(conformers.index(conf)) + calculate.append(conf) + elif conf.highlevel_sp_info["info"] == "prep-failed": + # has to be retried now + conf = conformers.pop(conformers.index(conf)) + calculate.append(conf) + elif conf.highlevel_sp_info["info"] == "calculated": + conf = conformers.pop(conformers.index(conf)) + if config.solvent != "gas": + # check if solvation calculation is calculated as well + if conf.highlevel_gsolv_info["info"] == "failed": + store_confs.append(conf) + print( + f"Calculation of the solvation contribution for CONF" + f"{conf.id} failed in the previous run!" + ) + elif conf.highlevel_gsolv_info["info"] == "not_calculated": + calculate.append(conf) + elif conf.highlevel_gsolv_info["info"] == "prep-failed": + # retry + calculate.append(conf) + elif conf.highlevel_gsolv_info["info"] == "calculated": + conf.job["success"] = True + prev_calculated.append(conf) + else: + print("UNEXPECTED BEHAVIOUR") + elif config.solvent == "gas": + conf.job["success"] = True + prev_calculated.append(conf) + else: + print( + f"WARNING: CONF{conf.id} has not been optimized (part2)! " + f"Removing CONF{conf.id}" + ) + conf = conformers.pop(conformers.index(conf)) + store_confs.append(conf) + if not calculate and not prev_calculated: + print("ERROR: No conformers left!") + print("Going to exit!") + sys.exit(1) + + instruction = { + "func": config.func3, + "basis": getattr(config, "basis3", "def2-TZVPD"), + "charge": config.charge, + "unpaired": config.unpaired, + "solvent": config.solvent, + "sm": config.smgsolv3, + "omp": config.omp, + "temperature": config.temperature, + "gfn_version": config.part3_gfnv, + "copymos": "", + "energy": 0.0, + "energy2": 0.0, + "success": False, + } + if config.multitemp: + instruction["trange"] = [ + i for i in frange(config.trange[0], config.trange[1], config.trange[2]) + ] + else: + instruction["trange"] = [] + if config.solvent == "gas": + instruction["jobtype"] = "sp" + instruction["prepinfo"] = ["high"] + instruction["method"], _ = config.get_method_name( + "sp", func=instruction["func"], basis=instruction["basis"] + ) + if config.prog3 == "orca": + instruction["progpath"] = config.external_paths["orcapath"] + folder = "gsolv" + name = "highlevel single-point" + else: + if config.smgsolv3 in config.smgsolv_3: + # additive GSolv + # COSMORS + if "cosmors" in config.smgsolv3 and config.smgsolv3 != "dcosmors": + job = TmJob + exc_fine = {"cosmors": "normal", "cosmors-fine": "fine"} + tmp = { + "jobtype": "cosmors", + "prepinfo": ["high"], + "cosmorssetup": config.external_paths["cosmorssetup"], + "cosmorsparam": exc_fine.get(config.smgsolv3, "normal"), + "cosmothermversion": config.external_paths["cosmothermversion"], + } + instruction.update(tmp) + instruction["method"], instruction["method2"] = config.get_method_name( + "cosmors", + func=instruction["func"], + basis=instruction["basis"], + sm=instruction["sm"], + ) + folder = "gsolv/COSMO" + name = "highlevel COSMO-RS" + # GBSA-Gsolv / ALPB-Gsolv + elif instruction["sm"] in ("gbsa_gsolv", "alpb_gsolv"): + # do DFT gas phase sp and additive Gsolv + instruction["jobtype"] = instruction["sm"] + if config.prog3 == "orca": + instruction["progpath"] = config.external_paths["orcapath"] + instruction["xtb_driver_path"] = config.external_paths["xtbpath"] + instruction["method"], instruction["method2"] = config.get_method_name( + instruction["jobtype"], + func=instruction["func"], + basis=instruction["basis"], + sm=instruction["sm"], + gfn_version=instruction["gfn_version"], + ) + if ( + conf.highlevel_sp_info["info"] == "calculated" + and conf.highlevel_sp_info["method"] == instruction["method"] + ): + # do not calculate gas phase sp again! + instruction["energy"] = conf.highlevel_sp_info["energy"] + instruction["prepinfo"] = [] + else: + instruction["prepinfo"] = ["high"] + name = "highlevel " + str(instruction["sm"]).upper() + folder = "gsolv" + # SMD-Gsolv + elif instruction["sm"] == "smd_gsolv": + job = OrcaJob + instruction["prepinfo"] = ["high"] + instruction["jobtype"] = instruction["sm"] + instruction["progpath"] = config.external_paths["orcapath"] + instruction["method"], instruction["method2"] = config.get_method_name( + "smd_gsolv", + func=instruction["func"], + basis=instruction["basis"], + sm=instruction["sm"], + ) + name = "highlevel" + str(instruction["sm"]).upper() + folder = "gsolv" + else: + # with implicit solvation + instruction["jobtype"] = "sp_implicit" + instruction["prepinfo"] = ["high"] + if config.prog3 == "orca": + instruction["progpath"] = config.external_paths["orcapath"] + instruction["method"], instruction["method2"] = config.get_method_name( + "sp_implicit", + func=instruction["func"], + basis=instruction["basis"], + sm=instruction["sm"], + ) + name = "high level single-point" + folder = "gsolv" + + if prev_calculated: + check_for_folder(config.cwd, [i.id for i in prev_calculated], folder) + print("The calculation was performed before for:") + print_block(["CONF" + str(i.id) for i in prev_calculated]) + pl = config.lenconfx + 4 + len(str("/" + folder)) + + check = {True: "was successful", False: "FAILED"} + if calculate: + # make new folder: + save_errors, store_confs, calculate = new_folders( + config.cwd, calculate, folder, save_errors, store_confs + ) + # need to copy optimized coord to folder + for conf in calculate: + tmp1 = os.path.join(config.cwd, "CONF" + str(conf.id), config.func, "coord") + tmp2 = os.path.join("CONF" + str(conf.id), folder, "coord") + try: + shutil.copy(tmp1, tmp2) + except FileNotFoundError: + print("ERROR can't copy optimized geometry!") + if config.solvent == "gas": + print("The high level single-point is now calculated for:") + else: + print("The high level gsolv calculation is now calculated for:") + print_block(["CONF" + str(i.id) for i in calculate]) + # parallel calculation: + calculate = run_in_parallel( + config, q, resultq, job, config.maxthreads, calculate, instruction, folder + ) + # check if too many calculations failed + + for conf in list(calculate): + if instruction["jobtype"] in ("sp", "sp_implicit"): + line = ( + f"{name} calculation {check[conf.job['success']]}" + f" for {last_folders(conf.job['workdir'], 2):>{pl}}: " + f"{conf.job['energy']:>.8f}" + ) + print(line) + if not conf.job["success"]: + save_errors.append(line) + conf.highlevel_sp_info["info"] = "failed" + conf.highlevel_sp_info["method"] = instruction["method"] + conf.part_info["part3"] = "refused" + store_confs.append(calculate.pop(calculate.index(conf))) + else: + conf.highlevel_sp_info["energy"] = conf.job["energy"] + conf.highlevel_sp_info["info"] = "calculated" + conf.highlevel_sp_info["method"] = instruction["method"] + elif instruction["jobtype"] in ( + "cosmors", + "smd_gsolv", + "gbsa_gsolv", + "alpb_gsolv", + ): + line = ( + f"{name} calculation {check[conf.job['success']]} for " + f"{last_folders(conf.job['workdir'], 3):>{pl}}: " + f"{conf.job['energy2']:>.8f}" + ) + print(line) + if not conf.job["success"]: + save_errors.append(line) + conf.part_info["part3"] = "refused" + conf.highlevel_sp_info["info"] = "failed" + conf.highlevel_sp_info["method"] = instruction["method"] + conf.highlevel_gsolv_info["info"] = "failed" + conf.highlevel_gsolv_info["method"] = instruction["method2"] + store_confs.append(calculate.pop(calculate.index(conf))) + else: + conf.highlevel_sp_info["energy"] = conf.job["energy"] + conf.highlevel_sp_info["info"] = "calculated" + conf.highlevel_sp_info["method"] = instruction["method"] + conf.highlevel_gsolv_info["energy"] = conf.job["energy2"] + conf.highlevel_gsolv_info["gas-energy"] = conf.job["energy"] + conf.highlevel_gsolv_info["info"] = "calculated" + conf.highlevel_gsolv_info["method"] = instruction["method2"] + conf.highlevel_gsolv_info["range"] = conf.job["erange1"] + else: + print( + f'UNEXPECTED BEHAVIOUR: {conf.job["success"]} {conf.job["jobtype"]}' + ) + # save current data to jsonfile + config.write_json( + config.cwd, + [i.provide_runinfo() for i in calculate] + + [i.provide_runinfo() for i in prev_calculated] + + [i.provide_runinfo() for i in store_confs] + + [ensembledata], + config.provide_runinfo(), + ) + check_tasks(calculate, config.check) + else: + print("No conformers are considered additionally.") + # adding conformers calculated before: + if prev_calculated: + # adding conformers calculated before: + for conf in list(prev_calculated): + conf.job["workdir"] = os.path.normpath( + os.path.join(config.cwd, "CONF" + str(conf.id), folder) + ) + if instruction["jobtype"] in ("sp", "sp_implicit"): + print( + f"Single-point calculation {check[conf.job['success']]} for " + f"{last_folders(conf.job['workdir'], 2):>{pl}}: " + f"{conf.highlevel_sp_info['energy']:>.8f}" + ) + elif instruction["jobtype"] in ( + "cosmors", + "smd_gsolv", + "gbsa_gsolv", + "alpb_gsolv", + ): + print( + f"COSMO-RS calculation {check[conf.job['success']]} for " + f"{last_folders(conf.job['workdir'], 3):>{pl}}: " + f"{conf.highlevel_gsolv_info['energy']:>.8f}" + ) + calculate.append(prev_calculated.pop(prev_calculated.index(conf))) + for conf in calculate: + conf.reset_job_info() + if not calculate: + print("ERROR: No conformers left!") + print("Going to exit!") + sys.exit(1) + # *************************************************************************** + if config.evaluate_rrho: + instruction_rrho = { + "jobtype": "rrhoxtb", + "func": getattr(config, "part3_gfnv"), + "gfn_version": getattr(config, "part3_gfnv"), + "temperature": config.temperature, + "charge": config.charge, + "unpaired": config.unpaired, + "solvent": config.solvent, + "omp": config.omp, + "bhess": config.bhess, + "sm_rrho": config.sm_rrho, + "rmsdbias": config.rmsdbias, + "cwd": config.cwd, + "consider_sym": config.consider_sym, + "energy": 0.0, + "energy2": 0.0, + "success": False, + "progpath": config.external_paths["xtbpath"], + } + folder_rrho = "rrho_part3" + instruction_rrho["method"], _ = config.get_method_name( + "rrhoxtb", + bhess=config.bhess, + gfn_version=instruction_rrho["gfn_version"], + sm=instruction_rrho["sm_rrho"], + solvent=instruction_rrho["solvent"], + ) + if config.multitemp: + instruction_rrho["trange"] = [ + i for i in frange(config.trange[0], config.trange[1], config.trange[2]) + ] + else: + instruction_rrho["trange"] = [] + + # check if calculated + for conf in list(calculate): + if conf.removed: + store_confs.append(calculate.pop(calculate.index(conf))) + print(f"CONF{conf.id} is removed as requested by the user!") + continue + if ( + conf.part_info["part2"] == "passed" + and conf.optimization_info["info"] == "calculated" + ): + if conf.highlevel_grrho_info["info"] == "calculated": + conf = calculate.pop(calculate.index(conf)) + conf.job["success"] = True + prev_calculated.append(conf) + elif conf.highlevel_grrho_info["info"] == "failed": + conf = calculate.pop(calculate.index(conf)) + conf.part_info["part3"] = "refused" + store_confs.append(conf) + print(f"Calculation of CONF{conf.id} failed in the previous run!") + elif conf.highlevel_grrho_info["info"] in ( + "not_calculated", + "prep-failed", + ): + # stay in calculate (e.g not_calculated or prep-failed) + # check if method has been calculated in part2 + if instruction_rrho["method"] == conf.lowlevel_grrho_info["method"]: + # has been calculated before, just copy + conf.job["success"] = True + attributes = vars(MoleculeData(0)).get("highlevel_grrho_info") + tmp = {} + for key in attributes.keys(): + if key != "prev_methods": + tmp[key] = getattr(conf, "lowlevel_grrho_info").get(key) + getattr(conf, "highlevel_grrho_info").update(tmp) + prev_calculated.append(calculate.pop(calculate.index(conf))) + elif ( + instruction_rrho["method"] + in conf.lowlevel_grrho_info["prev_methods"].keys() + ): + # has been calculated before, just copy + conf.job["success"] = True + conf.load_prev( + "lowlevel_grrho_info", + instruction_rrho["method"], + saveto="highlevel_grrho_info", + ) + prev_calculated.append(calculate.pop(calculate.index(conf))) + else: + print("UNEXPECTED BEHAVIOUR") + else: + conf = calculate.pop(calculate.index(conf)) + store_confs.append(conf) + if not calculate and not prev_calculated: + print("ERROR: No conformers left!") + print("Going to exit!") + sys.exit(1) + # do the rrho stuff: + if config.solvent == "gas": + print("\nCalculating highlevel G_mRRHO on DFT geometry!") + else: + print( + "\nCalculating highlevel G_mRRHO with implicit solvation " + "on DFT geometry!" + ) + if prev_calculated: + check_for_folder(config.cwd, [i.id for i in prev_calculated], folder_rrho) + print("The G_mRRHO calculation was performed before for:") + print_block(["CONF" + str(i.id) for i in prev_calculated]) + pl = config.lenconfx + 4 + len(str("/" + folder_rrho)) + + if calculate: + print("The G_mRRHO calculation is now performed for:") + print_block(["CONF" + str(i.id) for i in calculate]) + # create folders: + save_errors, store_confs, calculate = new_folders( + config.cwd, calculate, folder_rrho, save_errors, store_confs + ) + # copy optimized geoms to folder + for conf in list(calculate): + try: + tmp_from = os.path.join( + config.cwd, "CONF" + str(conf.id), config.func + ) + tmp_to = os.path.join( + config.cwd, "CONF" + str(conf.id), folder_rrho + ) + shutil.copy( + os.path.join(tmp_from, "coord"), os.path.join(tmp_to, "coord") + ) + except shutil.SameFileError: + pass + except FileNotFoundError: + if not os.path.isfile(os.path.join(tmp_from, "coord")): + print( + "ERROR: while copying the coord file from {}! " + "The corresponding file does not exist.".format(tmp_from) + ) + elif not os.path.isdir(tmp_to): + print("ERROR: Could not create folder {}!".format(tmp_to)) + print("ERROR: Removing conformer {}!".format(conf.name)) + conf.highlevel_grrho_info["info"] = "prep-failed" + store_confs.append(calculate.pop(calculate.index(conf))) + save_errors.append(f"CONF{conf.id} was removed, because IO failed!") + # parallel execution: + calculate = run_in_parallel( + config, + q, + resultq, + job, + config.maxthreads, + calculate, + instruction_rrho, + folder_rrho, + ) + check = {True: "was successful", False: "FAILED"} + # check if too many calculations failed + + ### + for conf in list(calculate): + print( + f"The G_mRRHO calculation @ {conf.job['symmetry']} " + f"{check[conf.job['success']]} for " + f"{last_folders(conf.job['workdir'], 2):>{pl}}: " + f"{conf.job['energy']:>.8f}" + ) + if not conf.job["success"]: + conf.part_info["part3"] = "refused" + conf.highlevel_grrho_info["info"] = "failed" + conf.highlevel_grrho_info["method"] = instruction_rrho["method"] + store_confs.append(calculate.pop(calculate.index(conf))) + else: + conf.sym = conf.job["symmetry"] + conf.highlevel_grrho_info["rmsd"] = conf.job["rmsd"] + conf.highlevel_grrho_info["energy"] = conf.job["energy"] + conf.highlevel_grrho_info["info"] = "calculated" + conf.highlevel_grrho_info["method"] = instruction_rrho["method"] + conf.highlevel_grrho_info["range"] = conf.job["erange1"] + conf.highlevel_hrrho_info["range"] = conf.job["erange2"] + conf.highlevel_hrrho_info["info"] = "calculated" + conf.highlevel_hrrho_info["method"] = instruction_rrho["method"] + # save current data to jsonfile + config.write_json( + config.cwd, + [i.provide_runinfo() for i in calculate] + + [i.provide_runinfo() for i in prev_calculated] + + [i.provide_runinfo() for i in store_confs] + + [ensembledata], + config.provide_runinfo(), + ) + check_tasks(calculate, config.check) + else: + print("No conformers are considered additionally.") + # adding conformers calculated before: + if prev_calculated: + for conf in list(prev_calculated): + conf.job["workdir"] = os.path.normpath( + os.path.join(config.cwd, "CONF" + str(conf.id), folder_rrho) + ) + print( + f"The G_mRRHO calculation @ {conf.sym} " + f"{check[conf.job['success']]} for " + f"{last_folders(conf.job['workdir'], 2):>{pl}}: " + f"{conf.highlevel_grrho_info['energy']:>.8f}" + ) + calculate.append(prev_calculated.pop(prev_calculated.index(conf))) + if not calculate: + print("ERROR: No conformers left!") + print("Going to exit!") + sys.exit(1) + # save current data to jsonfile + config.write_json( + config.cwd, + [i.provide_runinfo() for i in calculate] + + [i.provide_runinfo() for i in prev_calculated] + + [i.provide_runinfo() for i in store_confs] + + [ensembledata], + config.provide_runinfo(), + ) + # printout for part3 ------------------------------------------------------- + print("\n" + "".ljust(int(PLENGTH / 2), "-")) + print("* Gibbs free energies of part3 *".center(int(PLENGTH / 2), " ")) + print("".ljust(int(PLENGTH / 2), "-") + "\n") + columncall = [ + lambda conf: "CONF" + str(getattr(conf, "id")), + lambda conf: getattr(conf, "xtb_energy"), + lambda conf: getattr(conf, "rel_xtb_energy"), + lambda conf: getattr(conf, "highlevel_sp_info")["energy"], + lambda conf: getattr(conf, "highlevel_gsolv_info")["energy"], + lambda conf: getattr(conf, "highlevel_grrho_info")["energy"], + lambda conf: getattr(conf, "free_energy"), + lambda conf: getattr(conf, "rel_free_energy"), + ] + columnheader = [ + "CONF#", + "E(GFNn-xTB)", + "ΔE(GFNn-xTB)", + "E [Eh]", + "Gsolv [Eh]", + "GmRRHO [Eh]", + "Gtot", + "ΔGtot", + ] + columndescription = ["", "[a.u.]", "[kcal/mol]", "", "", "", "[Eh]", "[kcal/mol]"] + columndescription2 = ["", "", "", "", "", "", "", ""] + columnformat = ["", (12, 7), (5, 2), (12, 7), (12, 7), (12, 7), (12, 7), (5, 3)] + if config.solvent == "gas": + columndescription[3] = instruction["method"] + elif config.solvent != "gas": + columndescription[3] = instruction["method"] + columndescription[4] = instruction["method2"] + if config.evaluate_rrho: + columndescription[5] = str(instruction_rrho["method"]).upper() # Grrho + if not config.evaluate_rrho or config.solvent == "gas": + if not config.evaluate_rrho: + # ignore rrho in printout + columncall.pop(5) + columnheader.pop(5) + columndescription.pop(5) + columndescription2.pop(5) + columnformat.pop(5) + if config.solvent == "gas": + columncall.pop(4) + columnheader.pop(4) + columndescription.pop(4) + columndescription2.pop(4) + columnformat.pop(4) + + for conf in calculate: + if not config.evaluate_rrho: + rrho = None + else: + rrho = "highlevel_grrho_info" + if config.solvent == "gas": + solv = None + else: + solv = "highlevel_gsolv_info" + e = "highlevel_sp_info" + conf.calc_free_energy(e=e, solv=solv, rrho=rrho) + + calculate = calc_boltzmannweights(calculate, "free_energy", config.temperature) + try: + minfree = min([i.free_energy for i in calculate if i is not None]) + except ValueError: + raise + for conf in calculate: + conf.rel_free_energy = (conf.free_energy - minfree) * AU2KCAL + calculate.sort(key=lambda x: int(x.id)) + printout( + os.path.join(config.cwd, "part3.dat"), + columncall, + columnheader, + columndescription, + columnformat, + calculate, + minfree, + columndescription2=columndescription2, + ) + # end printout for part3 + + for conf in calculate: + if conf.free_energy == minfree: + ensembledata.bestconf["part3"] = conf.id + # -----------------------------Trange Ouput---------------------------------- + if config.multitemp: + trange = [ + t for t in frange(config.trange[0], config.trange[1], config.trange[2]) + ] + else: + trange = [config.temperature] + for conf in calculate: + if conf.free_energy == minfree: + # writeout of temperaturereante --> trange.dat + try: + l1 = max([len(str(i)) for i in trange]) + l2 = max([len(str(i - 273.15)) for i in trange]) + l3 = 12 + l4 = 12 + l5 = 14 + l6 = 16 + l7 = 12 + with open( + os.path.join(config.cwd, "trange.dat"), "w", newline=None + ) as out: + print( + f"\nTemperature range for lowest lying conformer: CONF{conf.id}" + ) + if getattr(ensembledata, "comment")[0] != conf.id: + # ensemble correction has been calculated for confx + print( + f"The avGcorrection was calculated in part2 for " + f"CONF{getattr(ensembledata, 'comment')[0]}" + ) + line = f"\n{'T/K':>{l1}} {'T/°C':>{l2}} " + if config.solvent != "gas": + line = line + f"{'δGsolv/au':>{l3}} " + if config.evaluate_rrho: + line = line + f"{'GmRRHO/au':>{l4}} " + line = line + ( + f"{'E/au':>{l5}} " + f"{'avGcorrection/au':>{l6}} {'Gtot/au':>{l7}}" + ) + print(line) + out.write(line + "\n") + line = "".ljust(int(PLENGTH), "-") + print(line) + out.write(line + "\n") + for t in trange: + tmp = 0.0 + line = f"{t:{l1}.2f} {t-273.15:>{l2}.1f} " + if config.solvent != "gas": + tmp += conf.highlevel_gsolv_info["range"].get(t, 0.0) + line = ( + line + + f"{conf.highlevel_gsolv_info['range'].get(t, 0.0):>{l3}.7f} " + ) + if config.evaluate_rrho: + tmp += conf.highlevel_grrho_info["range"].get(t, 0.0) + line = ( + line + + f"{conf.highlevel_grrho_info['range'].get(t, 0.0):>{l4}.7f} " + ) + line = line + ( + f"{conf.highlevel_sp_info['energy']:>{l5}.7f} " + f"{ensembledata.avGcorrection['avGcorrection'].get(t, 0.0):>{l6}.7f} " + ) + tmp += conf.highlevel_sp_info["energy"] + tmp += ensembledata.avGcorrection["avGcorrection"].get(t, 0.0) + line = line + f" {tmp:>{l7}.7f}" + print(line) + out.write(line + "\n") + print("".ljust(int(PLENGTH), "-")) + print("") + except (ValueError, KeyError) as e: + print(f"ERROR: {e}") + # -----------------------------Trange Ouput END------------------------------ + # reset boltzmannweights to correct temperature + # get free energy at (T) + for conf in calculate: + if not config.evaluate_rrho: + rrho = None + else: + rrho = "highlevel_grrho_info" + if config.solvent == "gas": + solv = None + else: + solv = "highlevel_gsolv_info" + e = "highlevel_sp_info" + conf.calc_free_energy(e=e, solv=solv, rrho=rrho) + calculate = calc_boltzmannweights(calculate, "free_energy", config.temperature) + # SORTING for the next part: + print("\n" + "".ljust(int(PLENGTH / 2), "-")) + print("Conformers considered further".center(int(PLENGTH / 2), " ")) + print("".ljust(int(PLENGTH / 2), "-") + "\n") + # evaluate conformer consideration based on Boltzmann-population + calculate.sort(reverse=True, key=lambda x: float(x.bm_weight)) + sumup = 0.0 + for conf in list(calculate): + sumup += conf.bm_weight + if sumup >= (config.part3_threshold / 100): + if conf.bm_weight < (1 - (config.part3_threshold / 100)): + mol = calculate.pop(calculate.index(conf)) + mol.part_info["part3"] = "refused" + store_confs.append(mol) + else: + conf.part_info["part3"] = "passed" + else: + conf.part_info["part3"] = "passed" + + ensembledata.nconfs_per_part["part3"] = len(calculate) + + if calculate: + print( + f"\nConformers that are below the Boltzmann-thr of {config.part3_threshold}%:" + ) + print_block(["CONF" + str(i.id) for i in calculate]) + + # save current data to jsonfile + config.write_json( + config.cwd, + [i.provide_runinfo() for i in calculate] + + [i.provide_runinfo() for i in prev_calculated] + + [i.provide_runinfo() for i in store_confs] + + [ensembledata], + config.provide_runinfo(), + ) + + # write ensemble + move_recursively(config.cwd, "enso_ensemble_part3.xyz") + kwargs = {"energy": "xtb_energy", "rrho": "highlevel_grrho_info"} + write_trj( + sorted(calculate, key=lambda x: float(x.free_energy)), + config.cwd, + "enso_ensemble_part3.xyz", + config.func, + config.nat, + "free_energy", + **kwargs, + ) + + # write coord.enso_best + for conf in calculate: + if conf.id == ensembledata.bestconf["part3"]: + # copy the lowest optimized conformer to file coord.enso_best + with open( + os.path.join("CONF" + str(conf.id), config.func, "coord"), + "r", + encoding=CODING, + newline=None, + ) as f: + coord = f.readlines() + with open( + os.path.join(config.cwd, "coord.enso_best"), "w", newline=None + ) as best: + best.write( + "$coord # {} {} !CONF{} \n".format( + conf.free_energy, conf.highlevel_grrho_info["energy"], conf.id + ) + ) + for line in coord[1:]: + if "$" in line: # stop at $end ... + break + best.write(line) + best.write("$end \n") + + # reset + for conf in calculate: + conf.free_energy = 0.0 + conf.rel_free_energy = 0.0 + conf.bm_weight = 0.0 + conf.reset_job_info() + + if save_errors: + print( + "***---------------------------------------------------------***", + file=sys.stderr, + ) + print( + "Printing most relevant errors again, just for user convenience:", + file=sys.stderr, + ) + for _ in list(save_errors): + print(save_errors.pop(), file=sys.stderr) + print( + "***---------------------------------------------------------***", + file=sys.stderr, + ) + tmp = int((PLENGTH - len("END of Part3")) / 2) + print("\n" + "".ljust(tmp, ">") + "END of Part3" + "".rjust(tmp, "<")) + return config, calculate, store_confs, ensembledata diff --git a/censo_qm/setupcenso.py b/censo_qm/setupcenso.py new file mode 100755 index 0000000..d17d86b --- /dev/null +++ b/censo_qm/setupcenso.py @@ -0,0 +1,637 @@ +""" +Contains enso_startup for the initialization of all parameters set for the +suseqent calculation. +""" +import os +import sys +import json +from collections import OrderedDict +from .cfg import CODING, PLENGTH, DESCR, censo_solvent_db, __version__ +from .inputhandling import config_setup, internal_settings +from .datastructure import MoleculeData +from .qm_job import QmJob +from .utilities import ( + mkdir_p, + do_md5, + t2x, + move_recursively, + get_energy_from_ensemble, + frange, + print, +) +from .ensembledata import EnsembleData + + +def enso_startup(cwd, args): + """ + 1) read cml input, + 2) print header + 3) read or create enso control file '.censorc' + 4) read or write flags.dat control file + 5) check for crest_conformers.xyz + 6) check program settings + 7) read or write enso.json + """ + + print(DESCR) + config = config_setup(path=os.path.abspath(cwd)) + + if args.cleanup: + print("Cleaning up the directory from unneeded files!") + config.cleanup_run() + print("Removed files and going to exit!") + sys.exit(0) + elif args.cleanup_all: + print("Cleaning up the directory from ALL unneeded files!") + config.cleanup_run(True) + print("Removed files and going to exit!") + sys.exit(0) + + if args.writeconfig: + newconfigfname = "censorc_new" + print( + "A new ensorc was written into the current directory file: " + f"{newconfigfname}!\nYou have to adjust the settings to your needs" + " and it is mandatory to correctly set the program paths!\n" + "Additionally move the file to the correct filename: '.censorc'\n" + "and place it either in your /home/$USER/ or current directory.\n" + "All done!" + ) + config.write_rcfile(os.path.join(config.cwd, newconfigfname)) + sys.exit(0) + configfname = ".censorc" + if os.path.isfile(os.path.join(config.cwd, configfname)): + # local configuration file before remote configuration file + config.configpath = os.path.join(config.cwd, configfname) + elif os.path.isfile(os.path.join(os.path.expanduser("~"), configfname)): + # remote configuration file + config.configpath = os.path.join(os.path.expanduser("~"), configfname) + else: + print( + f"ERROR: Could not find the config file: {configfname}.\n" + f"{'':{7}}The file has to be either in /home/$USER/ or the current " + "working directory!\nGoing to exit!" + ) + sys.exit(1) + + ### solvent database adjustable by user + censo_assets_path = os.path.expanduser("~/.censo_assets") + if not os.path.isdir(censo_assets_path): + mkdir_p(censo_assets_path) + solvent_user_path = os.path.expanduser( + os.path.join("~/.censo_assets/", "censo_solvents.json") + ) + if os.path.isfile(solvent_user_path): + config.save_infos.append( + "Reading file: {}\n".format(os.path.basename(solvent_user_path)) + ) + try: + with open(solvent_user_path, "r", encoding=CODING, newline=None) as inp: + censo_solvent_db.update(json.load(inp, object_pairs_hook=OrderedDict)) + except (ValueError, TypeError, FileNotFoundError): + print( + f"Your censo_solvents.json file in {solvent_user_path} is corrupted!\n" + ) + raise + + else: + with open(solvent_user_path, "w") as out: + json.dump(censo_solvent_db, out, indent=4, sort_keys=True) + + if args.restart and os.path.isfile(os.path.join(config.cwd, "enso.json")): + tmp = config.read_json(os.path.join(config.cwd, "enso.json"), silent=True) + previous_settings = tmp.get("settings") + # import json + # print(json.dumps(vars(args), sort_keys=False, indent=4)) + for key, value in previous_settings.items(): + if vars(args).get(key, "unKn_own") == "unKn_own": + # print(key, 'not_known') + continue + if getattr(args, key, "unKn_own") is None: + setattr(args, key, value) + # print(json.dumps(vars(args), sort_keys=False, indent=4)) + + if config.configpath: + # combine args und comandline + # check if startread in file: + startread = "$CRE SORTING SETTINGS:" + with open(config.configpath, "r") as myfile: + try: + tmp_version = "$VERSION" + data = myfile.readlines() + censorc_version = "0.0.0" + for line in data: + if tmp_version in line: + censorc_version = line.split(":")[1] + if int(censorc_version.split(".")[0]) < int(__version__.split(".")[0]): + print( + f"ERROR: There has been an API break and you have to " + "create a new .censorc.\n E.g. 'censo -newconfig'" + ) + sys.exit(1) + myfile.seek(0) # reset reader + except (ValueError, KeyError, AttributeError) as e: + print(e) + print(f"ERROR: Please create a new .censorc --> 'censo -newconfig'") + sys.exit(1) + if not startread in myfile.read(): + print(f"ERROR: You are using a corrupted .censorc. Create a new one!") + sys.exit(1) + config.read_config(config.configpath, startread, args) + + # read inputfile: + if os.path.isfile(os.path.join(config.cwd, "enso.json")): + tmp = config.read_json(os.path.join(config.cwd, "enso.json"), silent=True) + if "ensemble_info" in tmp and args.inp is None: + inpfile = os.path.basename(tmp["ensemble_info"].get("filename")) + if os.path.isfile(inpfile): + args.inp = inpfile + if args.debug: + print(f"Using Input file from: {inpfile}") + if args.inp is None: + args.inp = "crest_conformers.xyz" + if os.path.isfile(args.inp): + config.ensemblepath = args.inp + # identify coord or xyz trajectory + config.md5 = do_md5(config.ensemblepath) + with open(config.ensemblepath, "r", encoding=CODING, newline=None) as inp: + foundcoord = False + for line in inp: + if "$coord" in line: + foundcoord = True + break + if foundcoord: + _, config.nat = t2x( + config.ensemblepath, writexyz=True, outfile="converted.xyz" + ) + config.ensemblepath = os.path.join(config.cwd, "converted.xyz") + config.maxconf = 1 + config.nconf = 1 + else: + with open( + config.ensemblepath, "r", encoding=CODING, newline=None + ) as infile: + try: + config.nat = int(infile.readline().strip().split()[0]) + filelen = 1 + except (ValueError, TypeError): + raise + for line in infile: + filelen += 1 + try: + config.maxconf = int(filelen / (config.nat + 2)) + if filelen % (config.nat + 2) != 0: + raise ValueError + except ValueError: + print( + "ERROR: Could not get the number of atoms or the " + "number of conformers from the inputfile " + f"{os.path.basename(args.inp)}" + ) + sys.exit(1) + else: + print("ERROR: The input file can not be found!") + sys.exit(1) + + # determine number of conformers: + if args.nconf: + if args.nconf > config.maxconf: + config.nconf = config.maxconf + else: + config.nconf = args.nconf + else: + config.nconf = config.maxconf + + # check settings-combination and show error: + error_logical = config.check_logic() + + # printing parameters + config.print_parameters() + config.read_program_paths(config.configpath) + requirements = config.needed_external_programs(config) + error_logical = config.processQMpaths(requirements, error_logical) + + if error_logical and not args.debug and args.checkinput: + print( + "\nERROR: ENSO can not continue due to input errors!\n" + " Fix errors and run enso -checkinput again!" + "\nGoing to exit!" + ) + sys.exit(1) + elif error_logical and not args.debug: + print("\nERROR: ENSO can not continue due to input errors!" "\nGoing to exit!") + sys.exit(1) + + if not error_logical or args.debug: + print("\n" + "".ljust(PLENGTH, "-")) + print(" Processing data from previous run (enso.json)".center(PLENGTH, " ")) + print("".ljust(PLENGTH, "-") + "\n") + # read enso.json + if os.path.isfile(os.path.join(config.cwd, "enso.json")): + config.jsonpath = os.path.join(config.cwd, "enso.json") + save_data = config.read_json(config.jsonpath) + # Check if settings and "ensemble_info" are present else end! + if "settings" not in save_data or "ensemble_info" not in save_data: + print( + f"ERROR: important information for restarting missing from " + f"{config.jsonpath}!" + ) + print("Going to exit!") + sys.exit(1) + previousrun = config_setup(internal_settings) + for item in save_data["settings"].keys(): + setattr(previousrun, item, save_data["settings"].get(item)) + if config.md5 != previousrun.md5: + print( + "WARNING: The inputfile containing all conformers was " + "changed, compared to the previous run!" + ) + for flag in config.restart_unchangeable: + if getattr(config, flag, "None") != getattr(previousrun, flag, "None2"): + print( + f"ERROR: setting {flag} was changed from " + f"{getattr(config, flag, 'None')} to {getattr(previousrun, flag, 'None')}!" + ) + error_logical = True + if ( + getattr(config, "evaluate_rrho", "None") + != getattr(previousrun, "evaluate_rrho", "None2") + ) and getattr(config, "part2", "None"): + print( + f"ERROR: setting {'evaluate_rrho'} can not be changed " + f"in geometry optimization!\n" + ) + error_logical = True + if error_logical and not args.debug: + print( + "ERROR: All flags which are concerned with geometry " + f"optimization \n{'':{7}}(func, prog, ancopt, sm, solv, chrg, " + "unpaired) are not allowed to be changed!\n" + f"{'':{7}}If you want to change these settings, " + "start from scratch in a new folder!" + ) + print("Going to exit!") + sys.exit(1) + if not args.checkinput: + move_recursively(config.cwd, os.path.basename(config.jsonpath)) + + # Check if flags have been changed between two runs and adjust data + # e.g. reset or load previously calculated + for flag in config.restart_changeable.keys(): + if getattr(config, flag, "None") != getattr(previousrun, flag, "None2"): + print( + f"WARNING: setting {flag} was changed from " + f"{getattr(previousrun, flag, 'None')} to " + f"{getattr(config, flag, 'None')}!" + ) + config.restart_changeable[flag] = True + if ( + flag == "multitemp" + and getattr(config, flag, "None") + and not getattr(previousrun, flag, "None") + ): + # multitemp only reset if not calculated before! + # --> off --> on + print( + f"WARNING: {flag} is requested and the different " + "temperatures have not been evaluated in the\n" + f"{'':9}previous run! Resetting calculations concerning trange!" + ) + elif ( + flag == "multitemp" + and not getattr(config, flag, "None") + and getattr(previousrun, flag, "None") + ): + # multitemp only reset if not calculated before! + # --> off --> on + config.restart_changeable[flag] = False + if flag == "trange": + # if temp in trange has not been calculated reset! + prev_t = getattr(previousrun, flag) + prev_trange = [ + i for i in frange(prev_t[0], prev_t[1], prev_t[2]) + ] + cur_t = getattr(config, flag) + current_trange = [ + i for i in frange(cur_t[0], cur_t[1], cur_t[2]) + ] + for temp in current_trange: + if temp not in prev_trange: + config.restart_changeable[flag] = True + break + else: + config.restart_changeable[flag] = False + + conformers = [] + for conf in save_data.keys(): + if conf == "ensemble_info": + ensembledata = EnsembleData( + id=save_data[conf].get("id"), + filename=save_data[conf].get("filename"), + part_info=save_data[conf].get("part_info"), + avGcorrection=save_data[conf].get("avGcorrection"), + comment=save_data[conf].get("comment"), + bestconf=save_data[conf].get("bestconf"), + nconfs_per_part=save_data[conf].get("nconfs_per_part"), + ) + ensembledata.nconfs_per_part["starting"] = config.nconf + elif conf not in ("settings", "ensemble_info"): + for info in vars(MoleculeData(0)).keys(): + if save_data[conf].get(info, "xXx") == "xXx": + print( + f"WARNING: Missing data {info} from enso.json! " + "Default is added." + ) + molecule = QmJob( + save_data[conf].get("id"), + chrg=save_data[conf].get("chrg"), + uhf=save_data[conf].get("uhf"), + xtb_energy=save_data[conf].get("xtb_energy"), + xtb_energy_unbiased=save_data[conf].get("xtb_energy_unbiased"), + xtb_free_energy=save_data[conf].get("xtb_free_energy"), + rel_xtb_energy=save_data[conf].get("rel_xtb_energy"), + rel_xtb_free_energy=save_data[conf].get("rel_xtb_free_energy"), + sym=save_data[conf].get("sym"), + gi=save_data[conf].get("gi"), + removed=save_data[conf].get( + "removed", getattr(MoleculeData(0), "removed") + ), + temperature_info=save_data[conf].get( + "temperature_info", + getattr(MoleculeData(0), "temperature_info"), + ), + cheap_prescreening_sp_info=save_data[conf].get( + "cheap_prescreening_sp_info", + getattr(MoleculeData(0), "cheap_prescreening_sp_info"), + ), + cheap_prescreening_gsolv_info=save_data[conf].get( + "cheap_prescreening_gsolv_info", + getattr(MoleculeData(0), "cheap_prescreening_gsolv_info"), + ), + prescreening_sp_info=save_data[conf].get( + "prescreening_sp_info", + getattr(MoleculeData(0), "prescreening_sp_info"), + ), + lowlevel_sp_info=save_data[conf].get( + "lowlevel_sp_info", + getattr(MoleculeData(0), "lowlevel_sp_info"), + ), + highlevel_sp_info=save_data[conf].get( + "highlevel_sp_info", + getattr(MoleculeData(0), "highlevel_sp_info"), + ), + prescreening_grrho_info=save_data[conf].get( + "prescreening_grrho_info", + getattr(MoleculeData(0), "prescreening_grrho_info"), + ), + lowlevel_grrho_info=save_data[conf].get( + "lowlevel_grrho_info", + getattr(MoleculeData(0), "lowlevel_grrho_info"), + ), + lowlevel_hrrho_info=save_data[conf].get( + "lowlevel_hrrho_info", + getattr(MoleculeData(0), "lowlevel_hrrho_info"), + ), + highlevel_grrho_info=save_data[conf].get( + "highlevel_grrho_info", + getattr(MoleculeData(0), "highlevel_grrho_info"), + ), + highlevel_hrrho_info=save_data[conf].get( + "highlevel_hrrho_info", + getattr(MoleculeData(0), "highlevel_hrrho_info"), + ), + prescreening_gsolv_info=save_data[conf].get( + "prescreening_gsolv_info", + getattr(MoleculeData(0), "prescreening_gsolv_info"), + ), + lowlevel_gsolv_info=save_data[conf].get( + "lowlevel_gsolv_info", + getattr(MoleculeData(0), "lowlevel_gsolv_info"), + ), + lowlevel_gsolv_compare_info=save_data[conf].get( + "lowlevel_gsolv_compare_info", + getattr(MoleculeData(0), "lowlevel_gsolv_compare_info"), + ), + highlevel_gsolv_info=save_data[conf].get( + "highlevel_gsolv_info", + getattr(MoleculeData(0), "highlevel_gsolv_info"), + ), + optimization_info=save_data[conf].get( + "optimization_info", + getattr(MoleculeData(0), "optimization_info"), + ), + nmr_coupling_info=save_data[conf].get( + "nmr_coupling_info", + getattr(MoleculeData(0), "nmr_coupling_info"), + ), + nmr_shielding_info=save_data[conf].get( + "nmr_shielding_info", + getattr(MoleculeData(0), "nmr_shielding_info"), + ), + part_info=save_data[conf].get( + "part_info", getattr(MoleculeData(0), "part_info") + ), + comment=save_data[conf].get( + "comment", getattr(MoleculeData(0), "comment") + ), + optical_rotation_info=save_data[conf].get( + "optical_rotation_info", + getattr(MoleculeData(0), "optical_rotation_info"), + ), + ) + + # adjust to restart changeable data: + for key, value in config.restart_changeable.items(): + if value and key == "multitemp": + molecule.reset_range_info( + trange=[ + i + for i in frange( + config.trange[0], + config.trange[1], + config.trange[2], + ) + ] + ) + elif value and key == "trange": + molecule.reset_range_info( + trange=[ + i + for i in frange( + config.trange[0], + config.trange[1], + config.trange[2], + ) + ] + ) + elif value and key in ( + "part1_gfnv", + "part2_gfnv", + "part3_gfnv", + ): + exc = { + "part1_gfnv": "prescreening_grrho_info", + "part2_gfnv": "lowlevel_grrho_info", + "part3_gfnv": "highlevel_grrho_info", + } + if getattr(config, key) != getattr(previousrun, key): + # save calculated to + molecule.save_prev( + exc[key], getattr(molecule, exc[key]).get("method") + ) + # load new if available + method, _ = config.get_method_name( + "rrhoxtb", + gfn_version=getattr(config, key), + bhess=config.bhess, + ) + molecule.load_prev(exc[key], method) + elif value and key in ("smgsolv1", "smgsolv2", "smgsolv3"): + exc = { + "smgsolv1": "prescreening_gsolv_info", + "smgsolv2": "lowlevel_gsolv_info", + "smgsolv3": "highlevel_gsolv_info", + } + exc_implicit = { + "smgsolv1": "prescreening_sp_info", + "smgsolv2": "lowlevel_sp_info", + "smgsolv3": "highlevel_sp_info", + } + exc2 = { + "smgsolv1": "part1_gfnv", + "smgsolv2": "part2_gfnv", + "smgsolv3": "part3_gfnv", + } + if getattr(config, key) != getattr(previousrun, key): + # save additive gsolv calculated to + molecule.save_prev( + exc[key], getattr(molecule, exc[key]).get("method") + ) + # Gsolv for implicit solvation included in E + # save energy calculated to + molecule.save_prev( + exc_implicit[key], + getattr(molecule, exc_implicit[key]).get("method"), + ) + # load new if available + # method naming --> + if key in ("smgsolv1", "smgsolv2"): + func = config.func + basis = config.basis + elif key in ("smgsolv3",): + func = config.func3 + basis = config.basis3 + if getattr(config, key) == "smd_gsolv": + e_method, gsolv_method = config.get_method_name( + "smd_gsolv", + func=func, + basis=basis, + sm=getattr(config, key), + ) + elif getattr(config, key) in ( + "cosmors", + "cosmors-fine", + ): + e_method, gsolv_method = config.get_method_name( + "cosmors", + sm=getattr(config, key), + func=func, + basis=basis, + ) + elif getattr(config, key) in ( + "alpb_gsolv", + "gbsa_gsolv", + ): + e_method, gsolv_method = config.get_method_name( + getattr(config, key), + sm=getattr(config, key), + func=func, + basis=basis, + gfn_version=getattr(config, exc2[key]), + ) + elif getattr(config, key) in ( + "cpcm", + "cosmo", + "smd", + "dcosmors", + ): + # Gsolv for implicit solvation included in E + # need to reset gsolv (--> gsolv_method2 has to be nonsense) + e_method, gsolv_method = config.get_method_name( + "sp_implicit", + func=func, + basis=basis, + sm=getattr(config, key), + ) + else: + print("UNEXPECTED") + e_method = "" + gsolv_method = "" + molecule.load_prev(exc_implicit[key], e_method) + molecule.load_prev(exc[key], gsolv_method) + elif value and key in ( + "func_or", + "basis_or", + "freq_or", + "func_or_scf", + ): + # save calculated to + molecule.save_prev( + "optical_rotation_info", + getattr(molecule, "optical_rotation_info").get( + "method" + ), + ) + # load new if available + method, _ = config.get_method_name( + "opt-rot", + prog=config.prog, + basis=config.basis_or, + func=config.func_or, + func2=config.func_or_scf, + ) + molecule.load_prev("optical_rotation_info", method) + # finally add molecule to list + conformers.append(molecule) + # if nconf is increased add new conformers! + newconfs = [] + for i in range(1, config.nconf + 1): + considered = False + for conf in list(conformers): + if conf.id == i: + considered = True + break + if not considered: + print(f"Adding CONF{i} as new conformer!") + newconfs.append(QmJob(i)) + if newconfs: + conformers.extend(newconfs) + get_energy_from_ensemble(config.ensemblepath, config, conformers) + elif not args.checkinput: + # don't create enso.json on checkinput + # enso.json does not exist, create new conformers + print("No restart information exists and is created during this run!\n") + conformers = [] + for i in range(1, config.nconf + 1): + conformers.append(QmJob(i)) + # read energy from input_file and calculate rel_energy + get_energy_from_ensemble(config.ensemblepath, config, conformers) + ensembledata = EnsembleData() + ensembledata.filename = args.inp + ensembledata.nconfs_per_part["starting"] = config.nconf + config.write_json( + config.cwd, + [i.provide_runinfo() for i in conformers] + [ensembledata], + config.provide_runinfo(), + ) + + if (args.checkinput and not error_logical) or (args.debug and args.checkinput): + print("\nInput check is finished. The ENSO program can be executed.\n") + sys.exit(0) + if not conformers: + print("Error: No conformers are considered!\nGoing to exit!") + sys.exit(1) + # formatting information: + config.lenconfx = max([len(str(i.id)) for i in conformers]) + conformers.sort(key=lambda x: int(x.id)) + return args, config, conformers, ensembledata diff --git a/censo_qm/tm_job.py b/censo_qm/tm_job.py new file mode 100644 index 0000000..d5759c4 --- /dev/null +++ b/censo_qm/tm_job.py @@ -0,0 +1,1458 @@ +""" +Contains TmJob class for calculating TM related properties of conformers. +""" +import os +import math + +try: + from math import isclose +except ImportError: + from .utilities import isclose +import time +import subprocess +import shutil +from .cfg import CODING, ENVIRON, AU2KCAL, censo_solvent_db, external_paths +from .utilities import last_folders, print +from .qm_job import QmJob + + +class TmJob(QmJob): + """ + Perform calculations with TM + - create input with cefine + - single-point calculation + - COSMO-RS calculation + - optimization with xTB as driver + - shielding constant calculations + - coupling constant calculations + - writing of generic output for shielding and coupling constants + """ + + def __init__(self, rank, *args, **kwargs): + QmJob.__init__(self, rank, *args, **kwargs) + + def _prep_cefine(self): + """ + Run define for Turbomole calculation using comandline program cefine. + """ + if self.job["basis"] == "def2-QZVP(-gf)": + self.job["basis"] = "def2-QZVP" + removegf = True + else: + removegf = False + + # build cefine call: + minimal_call = [ + external_paths["cefinepath"], + "-func", + str(self.job["func"]), + "-bas", + str(self.job["basis"]), + "-sym", + "c1", + "-noopt", + ] + extension = { + "clear": [], + "low": ["-grid", "m3", "-scfconv", "6"], + "low+": ["-grid", "m4", "-scfconv", "6"], + "high": ["-grid", "m4", "-scfconv", "7"], + "high+": ["-grid", "m5", "-scfconv", "7"], + } + # additional = ["-fpol", "-novdw"] + + dogcp = False # uses gcp with basis and is added to controlappend + call = minimal_call + if self.job["prepinfo"]: + if isinstance(self.job["prepinfo"], list): + if self.job["prepinfo"][0] in extension.keys(): + call.extend(extension[self.job["prepinfo"][0]]) + if "DOGCP" in self.job["prepinfo"]: + _ = self.job["prepinfo"].pop(self.job["prepinfo"].index("DOGCP")) + dogcp = True + if len(self.job["prepinfo"]) > 1: + call.extend(self.job["prepinfo"][1:]) + else: + call.extend(extension["low"]) + else: + call.extend(extension["low"]) + + # kt2: + if self.job["func"] in ("kt2", "kt1"): + # used only for shielding or coupling calcs! + call.extend(["-novdw"]) + + # r2scan-3c hack + if self.job["func"] == "r2scan-3c": + if "m3" in call: + call[call.index("m3")] = "m4" + # settings which request no dispersion: + if "-novdw" in call: + requestnovdw = True + # print("FOUND NOVDW") + else: + requestnovdw = False + + # update unpaired electrons + if int(self.job["unpaired"]) > 0: + call = call + ["-uhf", str(self.job["unpaired"])] + # update charge: + if int(self.job["charge"]) != 0: + call = call + ["-chrg", str(self.job["charge"])] + # remove -fg functions from def2-QZVP basis set + if removegf: + call = call + ["-gf"] + # call cefine: + for _ in range(2): + # print(call) + tmp = subprocess.check_output( + call, + shell=False, + stdin=None, + stderr=subprocess.STDOUT, + universal_newlines=False, + cwd=self.job["workdir"], + ) + time.sleep(0.08) + output = tmp.decode("utf-8").splitlines() + for line in output: + if "define ended abnormally" in line: + self.job["success"] = False + return + elif "define_huge" in line: + print("ERROR: define_huge: not found!") + self.job["success"] = False + return + # check if wrong functional was written by cefine + with open( + os.path.join(self.job["workdir"], "control"), + "r", + encoding=CODING, + newline=None, + ) as control: + checkup = control.readlines() + for line in checkup: + if "functional" in line: + testfunc = [self.job["func"]] + if self.job["func"] in ("b973c", "b97-3c"): + testfunc.extend(["b973c", "b97-3c"]) + if not any(func in line for func in testfunc): + print( + "Wrong functional in control file" + " in {}".format(last_folders(self.job["workdir"], 2)) + ) + self.job["success"] = False + self.job["internal_error"].append("prep-failed") + else: + self.job["success"] = True + break + if not self.job["success"]: + return + + if self.job["func"] in ("kt2", "kt1"): + # update functional to kt2 + with open( + os.path.join(self.job["workdir"], "control"), "r", newline=None + ) as inp: + tmp = inp.readlines() + with open( + os.path.join(self.job["workdir"], "control"), "w", newline=None + ) as out: + for line in tmp: + if "functional" in line: + out.write(" functional xcfun set-gga \n") + if self.job["func"] == "kt2": + out.write(" functional xcfun kt2 1.0 \n") + elif self.job["func"] == "kt1": + out.write(" functional xcfun kt1 1.0 \n") + else: + out.write(line + "\n") + time.sleep(0.02) + # modify the control file + # solvent_dcosmors = { + # "acetone": [" epsilon= 20.7", "$dcosmo_rs file=propanone_25.pot"], + # "chcl3": [" epsilon= 4.8", "$dcosmo_rs file=chcl3_25.pot"], + # "acetonitrile": [" epsilon= 36.6", "$dcosmo_rs file=acetonitrile_25.pot"], + # "ch2cl2": [" epsilon= 9.1", "$dcosmo_rs file=chcl3_25.pot"], + # "dmso": [" epsilon= 47.2", "$dcosmo_rs file=dimethylsulfoxide_25.pot"], + # "h2o": [" epsilon= 80.1", "$dcosmo_rs file=h2o_25.pot"], + # "methanol": [" epsilon= 32.7", "$dcosmo_rs file=methanol_25.pot"], + # "thf": [" epsilon= 7.6", "$dcosmo_rs file=thf_25.pot"], + # "toluene": [" epsilon= 2.4", "$dcosmo_rs file=toluene_25.pot"], + # "octanol": [" epsilon= 9.86", "$dcosmo_rs file=octanol_25.pot"], + # "woctanol": [" epsilon= 8.1", "$dcosmo_rs file=wet-octanol_25.pot"], + # "hexadecane": [" epsilon= 2.08", "$dcosmo_rs file=hexadecane_25.pot"], + # "dmf": [" epsilon= 38.3", "$dcosmo_rs file="], # not in standard TM parameter folder! + # } + if self.job["solvent"] not in ("gas", "gas-phase", None): + solvent_dcosmors = { + self.job["solvent"]: [ + f" epsilon= {censo_solvent_db[self.job['solvent']]['DC']}", + f"$dcosmo_rs file={censo_solvent_db[self.job['solvent']]['dcosmors'][1]}_25.pot", + ] + } + # handle solvents: + controlappend = [] + if self.job["sm"] == "dcosmors" and self.job["solvent"] != "gas": + try: + filename = solvent_dcosmors.get(self.job["solvent"])[1].split("=")[1] + except IndexError: + filename = "" + if not os.path.isfile( + os.path.join( + os.path.dirname( + os.path.dirname(os.path.dirname(shutil.which("ridft"))) + ), + "parameter/" + filename, + ) + ): + if os.path.isfile( + os.path.expanduser(os.path.join("~/.censo_assets/", filename)) + ): + tmp = solvent_dcosmors.get(self.job["solvent"], "") + tmp[4] = "$dcosmo_rs file=" + str( + os.path.expanduser(os.path.join("~/.censo_assets/", filename)) + ) + solvent_dcosmors[self.job["solvent"]] = tmp + else: + line = ( + "WARNING: DCOSMO-RS potential file not found!" + " Trying file without verification!" + ) + # print(line) + self.job["internal_error"].append(line) + if self.job["solvent"] != "gas" and self.job["sm"] in ("cosmo", "dcosmors"): + if solvent_dcosmors.get(self.job["solvent"], "not found!") == "not found!": + print(f"ERROR: Solvent {self.job['solvent']} is not known for cefine!") + self.job["success"] = False + self.job["internal_error"].append("prep-failed") + return + else: + controlappend.append("$cosmo") + # write epsilon (dielectric constant) + controlappend.append(solvent_dcosmors.get(self.job["solvent"], "")[0]) + if self.job["jobtype"] not in ("opt-rot", "opt-rot_sp"): + controlappend.append(" cavity closed") + controlappend.append(" use_contcav") + controlappend.append(" nspa=272") + controlappend.append(" nsph=162") + controlappend.append("$cosmo_isorad") + # write parameterfile for dcosmors + if self.job["sm"] == "dcosmors": + controlappend.append( + solvent_dcosmors.get(self.job["solvent"], "")[1] + ) + if self.job["jobtype"] in ("opt-rot", "opt-rot_sp"): + controlappend.append("$scfinstab dynpol nm") + for i in self.job["freq_or"]: + controlappend.append(f" {i}") # e.g. 589 + controlappend.append("$velocity gauge") + controlappend.append("$rpaconv 4") + + if dogcp: + if self.job["basis"] == "def2-SV(P)": + controlappend.append("$gcp dft/sv(p)") + else: + controlappend.append( + f"$gcp dft/{self.job['basis'].lower().replace('-', '')}" + ) + + # write to control file: + with open( + os.path.join(self.job["workdir"], "control"), + "r", + encoding=CODING, + newline=None, + ) as control: + tmp = control.readlines() + # check if dispersion is found + nodisp = True + replacewatm = "" + needatm = ("b97-3c", "b973c") + for line in tmp: + if "$disp" in line: + nodisp = False + if self.job["func"] in needatm: + if "$disp3 -bj -abc" not in line: + replacewatm = "$disp3 -bj -abc" + if nodisp and (self.job["func"] not in needatm) and not requestnovdw: + controlappend.append("$disp3 -bj") + elif nodisp and (self.job["func"] in needatm) and not requestnovdw: + controlappend.append("$disp3 -bj -abc") + if nodisp and requestnovdw: + replacewatm = " " + if controlappend: + with open( + os.path.join(self.job["workdir"], "control"), "w", newline=None + ) as newcontrol: + for line in tmp[:-1]: + if "$end" in line: + pass + if "$disp" in line and replacewatm: + newcontrol.write(replacewatm + "\n") + else: + newcontrol.write(line) + for line in controlappend: + newcontrol.write(line + "\n") + newcontrol.write("$end\n") + if self.job["copymos"]: + if self.job["unpaired"] > 0: + molist = ["alpha", "beta"] + else: + molist = ["mos"] + try: + for item in molist: + tmp_from = os.path.join( + "CONF" + str(self.id), self.job["copymos"], item + ) + tmp_to = os.path.join(self.job["workdir"], item) + shutil.copy(tmp_from, tmp_to) + except FileNotFoundError: + pass + for item in molist: + if ( + not os.path.isfile(os.path.join(self.job["workdir"], item)) + or os.stat(os.path.join(self.job["workdir"], item)).st_size == 0 + ): + print(f"Error: {item} is missing!") + # NMR part + if self.job["jobtype"] in ( + "couplings", + "couplings_sp", + "shieldings", + "shieldings_sp", + ): + with open( + os.path.join(self.job["workdir"], "control"), + "r", + encoding=CODING, + newline=None, + ) as control: + tmp = control.readlines() + with open( + os.path.join(self.job["workdir"], "control"), "w", newline=None + ) as newcontrol: + for line in tmp: + if "rpacor" in line: + rpacor = 10000 + try: + tmpval = float(line.strip().split()[-1]) + if tmpval > rpacor: + rpacor = tmpval + except (ValueError, IndexError): + pass + tmp[tmp.index(line)] = f"$rpacor {str(rpacor)} \n" + elif "$cosmo_isorad" in line: + tmp.pop(tmp.index(line)) + for line in tmp[:-1]: + if "$end" in line: + pass + else: + newcontrol.write(line) + newcontrol.write("$ncoupling\n") + newcontrol.write(" simple\n") + # fc sd pso dso nofcsdcross + newcontrol.write(" thr=0.0\n") + nucsel1 = "$nucsel " + nucsel2 = "$nucsel2 " + if self.job["h_active"]: + nucsel1 = nucsel1 + '"h" ' + nucsel2 = nucsel2 + '"h" ' + if self.job["c_active"]: + nucsel1 = nucsel1 + '"c" ' + nucsel2 = nucsel2 + '"c" ' + if self.job["f_active"]: + nucsel1 = nucsel1 + '"f" ' + nucsel2 = nucsel2 + '"f" ' + if self.job["si_active"]: + nucsel1 = nucsel1 + '"si" ' + nucsel2 = nucsel2 + '"si" ' + if self.job["p_active"]: + nucsel1 = nucsel1 + '"p" ' + nucsel2 = nucsel2 + '"p" ' + if any( + [ + self.job["h_active"], + self.job["c_active"], + self.job["c_active"], + self.job["si_active"], + self.job["p_active"], + ] + ): + newcontrol.write(nucsel1 + "\n") + newcontrol.write(nucsel2 + "\n") + else: + # don't write nucsel, every shielding, coupling will be calculated + pass + newcontrol.write("$rpaconv 8\n") + newcontrol.write("$end") + time.sleep(0.15) + + # ****************************end cefine************************************ + + def _sp(self, silent=False): + """ + Turbomole single-point calculation, needs previous cefine run + """ + if not self.job["onlyread"]: + if not silent: + print( + f"Running single-point in {last_folders(self.job['workdir'], 2):18}" + ) + with open( + os.path.join(self.job["workdir"], "ridft.out"), "w", newline=None + ) as outputfile: + subprocess.call( + ["ridft"], + shell=False, + stdin=None, + stderr=subprocess.STDOUT, + universal_newlines=False, + cwd=self.job["workdir"], + stdout=outputfile, + env=ENVIRON, + ) + time.sleep(0.02) + # check if scf is converged: + if os.path.isfile(os.path.join(self.job["workdir"], "ridft.out")): + with open( + os.path.join(self.job["workdir"], "ridft.out"), + "r", + encoding=CODING, + newline=None, + ) as inp: + stor = inp.readlines() + if " ENERGY CONVERGED !\n" not in stor: + print( + "ERROR: scf in {:18} not converged!".format( + last_folders(self.job["workdir"], 2) + ) + ) + self.job["success"] = False + self.job["energy"] = 0.0 + self.job["energy2"] = 0.0 + return + else: + print( + "WARNING: {} doesn't exist!".format( + os.path.join(self.job["workdir"], "ridft.out") + ) + ) + self.job["success"] = False + self.job["energy"] = 0.0 + self.job["energy2"] = 0.0 + return + if os.path.isfile(os.path.join(self.job["workdir"], "energy")): + with open( + os.path.join(self.job["workdir"], "energy"), + "r", + encoding=CODING, + newline=None, + ) as energy: + storage = energy.readlines() + try: + self.job["energy"] = float(storage[-2].split()[1]) + self.job["success"] = True + except ValueError: + print( + "ERROR while converting energy in: {:18}".format( + last_folders(self.job["workdir"], 2) + ) + ) + if self.job["jobtype"] == "sp_implicit": + self.job["energy2"] = 0.0 + else: + self.job["energy"] = 0.0 + self.job["success"] = False + + # ****************************end _sp*************************************** + + def _cosmors(self): + """ + Run COSMO-RS from within censo. + calculates directly in the workdir. folder COSMO has to be created + beforehand. + energy --> gas phase scf energy + energy2 --> gsolv contribution + """ + if not self.job["onlyread"]: + print( + f"Running COSMO-RS calculation in " + f"{last_folders(self.job['workdir'], 3):18}" + ) + # parametrisation + if self.job["cosmorsparam"] == "fine": + pass + elif self.job["cosmorsparam"] == "normal": + if "FINE" in self.job["cosmorssetup"]: + ## normal cosmors + tmp = self.job["cosmorssetup"] + tmp = tmp.replace("_FINE", "") + self.job["cosmorssetup"] = tmp.replace("BP_TZVPD", "BP_TZVP") + # run two single-points: + if self.job["copymos"]: + if self.job["unpaired"] > 0: + molist = ["alpha", "beta"] + else: + molist = ["mos"] + try: + for item in molist: + tmp_from = os.path.join( + "CONF" + str(self.id), self.job["copymos"], item + ) + tmp_to = os.path.join(self.job["workdir"], item) + shutil.copy(tmp_from, tmp_to) + except FileNotFoundError: + pass + for item in molist: + if ( + not os.path.isfile(os.path.join(self.job["workdir"], item)) + or os.stat(os.path.join(self.job["workdir"], item)).st_size == 0 + ): + print(f"Error: {item} is missing!") + # first single-point in gas phase! + tmp_solvent = self.job["solvent"] + tmp_sm = self.job["solvent"] + self.job["solvent"] = "gas" + self.job["sm"] = "gas-phase" + self._prep_cefine() + if not self.job["success"]: + return + # running single-point in gas phase + self._sp(silent=True) + self.job["solvent"] = tmp_solvent + self.job["sm"] = tmp_sm + if not self.job["success"]: + print( + "ERROR: gas-phase single-point calculation failed in: " + f"{last_folders(self.job['workdir'], 3):18}" + ) + return + with open( + os.path.join(self.job["workdir"], "out.energy"), "w", newline=None + ) as out: + out.write(str(self.job["energy"]) + "\n") + gas_phase_energy = self.job["energy"] + self.job["energy"] = 0.0 + # running single-point in ideal conductor! + with open( + os.path.join(self.job["workdir"], "control"), + "r", + encoding=CODING, + newline=None, + ) as inp: + tmp = inp.readlines() + with open( + os.path.join(self.job["workdir"], "control"), "w", newline=None + ) as out: + for line in tmp[:-1]: + out.write(line + "\n") + if self.job["cosmorsparam"] == "normal": + # normal + out.write("$cosmo \n") + out.write(" epsilon=infinity \n") + out.write(" use_contcav \n") + out.write(" cavity closed \n") + out.write(" nspa=272 \n") + out.write(" nsph=162 \n") + out.write("$cosmo_out file=out.cosmo \n") + out.write("$end \n") + else: + # fine + out.write("$cosmo \n") + out.write(" epsilon=infinity \n") + out.write(" use_contcav \n") + out.write(" cavity closed \n") + out.write(" nspa=272 \n") + out.write(" nsph=162 \n") + out.write("$cosmo_out file=out.cosmo \n") + # out.write("$cosmo_isorad \n") + # out.write("$cosmo_isodens \n") + out.write("$end \n") + self.job["success"] = False + self._sp(silent=True) + if not self.job["success"]: + print( + "ERROR: single-point in ideal conductor calculation failed in: " + f"{last_folders(self.job['workdir'], 3):18}" + ) + return + # info from .ensorc # replacement for cosmothermrc + # fdir=/software/cluster/COSMOthermX16/COSMOtherm/DATABASE-COSMO/BP-TZVP-COSMO autoc + # cosmors_solv = { + # "acetone": ["f = propanone.cosmo "], + # "h2o": ["f = h2o.cosmo "], + # "chcl3": ["f = chcl3.cosmo "], + # "ch2cl2": ["f = ch2cl2.cosmo "], + # "acetonitrile": ["f = acetonitrile_c.cosmo "], + # "dmso": ["f = dimethylsulfoxide.cosmo "], + # "methanol": ["f = methanol.cosmo "], + # "thf": ["f = thf.cosmo "], + # "toluene": ["f = toluene_c0.cosmo "], + # "octanol": ["f = 1-octanol "], + # "hexadecane": ["f = n-hexadecane "], + # "woctanol": ["f = h2o.cosmo ", "f = 1-octanol "], + # } + + if self.job["solvent"] not in ("gas", "gas-phase", None): + if self.job["solvent"] == "woctanol": + cosmors_solv = { + "woctanol": ["f = h2o.cosmo ", "f = 1-octanol.cosmo "] + } + else: + tmp_1 = os.path.splitext( + censo_solvent_db[self.job["solvent"]]["cosmors"][1] + )[0] + filename = f"{tmp_1}.cosmo" + cosmors_solv = {f"{self.job['solvent']}": [f"f = {filename} "]} + + mixture = {"woctanol": ["0.27 0.73"]} + if self.job["cosmorsparam"] == "fine": + solv_data = os.path.join( + os.path.split(self.job["cosmorssetup"].split()[5].strip('"'))[0], + "DATABASE-COSMO/BP-TZVPD-FINE", + ) + else: + solv_data = os.path.join( + os.path.split(self.job["cosmorssetup"].split()[5].strip('"'))[0], + "DATABASE-COSMO/BP-TZVP-COSMO", + ) + # test = ['ctd = BP_TZVP_C30_1601.ctd cdir = "/software/cluster/COSMOthermX16/COSMOtherm/CTDATA-FILES"'] + with open( + os.path.join(self.job["workdir"], "cosmotherm.inp"), "w", newline=None + ) as out: + out.write(self.job["cosmorssetup"] + "\n") + # write from ensorc + out.write("EFILE VPFILE \n") + out.write("!!\n") # for jobname in cosmors + if len(cosmors_solv[self.job["solvent"]]) > 1: + mix = mixture[self.job["solvent"]][0] + for line in cosmors_solv[self.job["solvent"]]: + out.write(line + "fdir=" + solv_data + " autoc \n") + elif len(cosmors_solv[self.job["solvent"]]) == 1: + mix = "1.0 0.0" + out.write( + cosmors_solv[self.job["solvent"]][0] + + " fdir=" + + solv_data + + " autoc \n" + ) + out.write("f = out.cosmo \n") + + if self.job["trange"]: + tmp1 = self.job["trange"] + tinside = False + for temp in tmp1: + if isclose(self.job["temperature"], temp, abs_tol=0.6): + tinside = True + if not tinside: + tmp1.append(self.job["temperature"]) + else: + tmp1 = [self.job["temperature"]] + tlist = [str("{:.2f}".format(i - 273.15)) for i in tmp1] + # henry = "henry xh={ "+mix+" } tc=25.0 Gsolv" + for i in tlist: + henry = "henry xh={ " + mix + " } tc=" + i + " Gsolv" + out.write(henry + "\n") + time.sleep(0.01) + # running COSMOtherm + with open( + os.path.join(self.job["workdir"], "cosmotherm.out"), "w", newline=None + ) as outputfile: + subprocess.call( + ["cosmotherm", "cosmotherm.inp"], + shell=False, + stdin=None, + stderr=subprocess.STDOUT, + universal_newlines=False, + cwd=self.job["workdir"], + stdout=outputfile, + env=ENVIRON, + ) + time.sleep(0.1) + # get T and Gsolv for version > cosmothermX16 + ## volumework: + R = 1.987203585e-03 # kcal/(mol*K) + videal = ( + 24.789561955 / 298.15 + ) # molar volume for ideal gas at 298.15 K 100.0 kPa + gsolvt = {} + try: + with open( + os.path.join(self.job["workdir"], "cosmotherm.tab"), + "r", + encoding=CODING, + newline=None, + ) as inp: + stor = inp.readlines() + for line in stor: + vwork = 0 + if "T=" in line: + T = float(line.split()[5]) + vwork = R * T * math.log(videal * T) + elif " out " in line: + gsolvt[T] = float(line.split()[-1]) / AU2KCAL + vwork / AU2KCAL + self.job["erange1"] = gsolvt + except (FileNotFoundError, ValueError): + print( + "ERROR: cosmotherm.tab was not written, this error can be " + "due to a missing licensefile information, or wrong path " + "to the COSMO-RS Database." + ) + self.job["energy"] = 0.0 + self.job["energy2"] = 0.0 + self.job["erange1"] = {} + self.job["success"] = False + return + except IndexError: + print("ERROR: IndexERROR in cosmotherm.tab!") + self.job["energy"] = 0.0 + self.job["energy2"] = 0.0 + self.job["erange1"] = {} + self.job["success"] = False + return + # cosmothermrd + if ( + os.stat(os.path.join(self.job["workdir"], "cosmotherm.tab")).st_size + == 0 + ): + print( + "ERROR: cosmotherm.tab was not written, this error can be " + "due to a missing licensefile information, or wrong path " + "to the COSMO-RS Database." + ) + self.job["energy"] = 0.0 + self.job["energy2"] = 0.0 + self.job["erange1"] = {} + self.job["success"] = False + gsolv_out = 0.0 + for temp in gsolvt.keys(): + if isclose(self.job["temperature"], temp, abs_tol=0.6): + gsolv_out = gsolvt[temp] + temp = float(self.job["temperature"]) + ## volumework: + R = 1.987203585e-03 # kcal/(mol*K) + videal = ( + 24.789561955 / 298.15 + ) # molar volume for ideal gas at 298.15 K 100.0 kPa + volwork = R * temp * math.log(videal * temp) + + with open( + os.path.join(os.path.dirname(self.job["workdir"]), "cosmors.out"), + "w", + newline=None, + ) as out: + out.write( + "This is cosmothermrd (python version in ENSO) (SG,FB,SAW, 06/18)\n" + ) + out.write("final thermochemical solvation properties in kcal/mol\n") + out.write( + "----------------------------------------------------------\n" + ) + out.write( + " Gsolv({} K)= {:10.3f}\n".format( + temp, gsolv_out * AU2KCAL - volwork + ) + ) + out.write(" VWork({} K)= {:10.3f}\n".format(temp, volwork)) + out.write( + " Gsolv+VWork({} K)= {:10.3f}\n".format( + temp, (gsolv_out * AU2KCAL) # volwork already included! + ) + ) + time.sleep(0.01) + self.job["energy"] = gas_phase_energy + self.job["energy2"] = gsolv_out # VOLWORK INCLUDED + self.job["erange1"][self.job["temperature"]] = gsolv_out # VOLWORK INCLUDED + self.job["success"] = True + + # ********************************end _cosmors*********************************** + def _xtbopt(self): + """ + Turbomole optimization using the ANCOPT optimizer implemented in xTB + """ + error_logical = False + if self.job["fullopt"]: + output = "opt-part2.out" + else: + output = "opt-part1.out" + if not self.job["onlyread"]: + print(f"Running optimization in {last_folders(self.job['workdir'], 2):18}") + files = [ + "xtbrestart", + "xtbtopo.mol", + "xcontrol-inp", + "wbo", + "charges", + "gfnff_topo", + ] + for file in files: + if os.path.isfile(os.path.join(self.job["workdir"], file)): + os.remove(os.path.join(self.job["workdir"], file)) + + callargs = [ + self.job["xtb_driver_path"], + "coord", + "--opt", + self.job["optlevel"], + "--tm", + ] + with open( + os.path.join(self.job["workdir"], "opt.inp"), "w", newline=None + ) as out: + out.write("$opt \n") + if ( + self.job["optcycles"] is not None + and float(self.job["optcycles"]) > 0 + ): + out.write(f"maxcycle={str(self.job['optcycles'])} \n") + out.write(f"microcycle={str(self.job['optcycles'])} \n") + out.write("average conv=true \n") + out.write(f"hlow={self.job.get('hlow', 0.01)} \n") + out.write("s6=30.00 \n") + # remove unnecessary sp/gradient call in xTB + out.write("engine=lbfgs\n") + out.write("$end \n") + callargs.append("-I") + callargs.append("opt.inp") + time.sleep(0.02) + with open( + os.path.join(self.job["workdir"], output), "w", newline=None + ) as outputfile: + returncode = subprocess.call( + callargs, + shell=False, + stdin=None, + stderr=subprocess.STDOUT, + universal_newlines=False, + cwd=self.job["workdir"], + stdout=outputfile, + env=ENVIRON, + ) + if returncode != 0: + error_logical = True + print( + "ERROR: optimization in {:18} not converged".format( + last_folders(self.job["workdir"], 2) + ) + ) + time.sleep(0.02) + # check if converged: + if os.path.isfile(os.path.join(self.job["workdir"], output)): + with open( + os.path.join(self.job["workdir"], output), + "r", + encoding=CODING, + newline=None, + ) as inp: + stor = inp.readlines() + for line in stor: + if ( + "external code error" in line + or "|grad| > 500, something is totally wrong!" in line + or "abnormal termination of xtb" in line + ): + print( + "ERROR: optimization in {:18} not converged".format( + last_folders(self.job["workdir"], 2) + ) + ) + error_logical = True + break + elif " FAILED TO CONVERGE GEOMETRY " in line: + self.job["cycles"] += int(line.split()[7]) + self.job["converged"] = False + elif "*** GEOMETRY OPTIMIZATION CONVERGED AFTER " in line: + self.job["cycles"] += int(line.split()[5]) + self.job["converged"] = True + with open( + os.path.join(self.job["workdir"], output), + "r", + encoding=CODING, + newline=None, + ) as inp: + for line in inp: + if "av. E: " in line: + # self.job["ecyc"].append(float(line.split("Eh")[0].split()[-1])) + self.job["ecyc"].append(float(line.split("->")[-1])) + if " :: gradient norm " in line: + self.job["grad_norm"] = float(line.split()[3]) + else: + print( + "WARNING: {} doesn't exist!".format( + os.path.join(self.job["workdir"], output) + ) + ) + error_logical = True + if not error_logical: + try: + self.job["energy"] = self.job["ecyc"][-1] + self.job["success"] = True + except: + error_logical = True + if error_logical: + self.job["energy"] = 0.0 + self.job["success"] = False + self.job["converged"] = False + self.job["ecyc"] = [] + self.job["grad_norm"] = 10.0 + + ##### VERSION BEFORE AVERAGING KEEP FOR NOW + # def _xtbopt(self): + # """ + # Turbomole optimization using the ANCOPT optimizer implemented in xTB + # """ + # error_logical = False + # if self.job["fullopt"]: + # output = "opt-part2.out" + # else: + # output = "opt-part1.out" + # if not self.job["onlyread"]: + # print(f"Running optimization in {last_folders(self.job['workdir'], 2):18}") + # files = [ + # "xtbrestart", + # "xtbtopo.mol", + # "xcontrol-inp", + # "wbo", + # "charges", + # "gfnff_topo", + # ] + # for file in files: + # if os.path.isfile(os.path.join(self.job["workdir"], file)): + # os.remove(os.path.join(self.job["workdir"], file)) + + # callargs = [ + # self.job["xtb_driver_path"], + # "coord", + # "--opt", + # self.job["optlevel"], + # "--tm", + # ] + # with open( + # os.path.join(self.job["workdir"], "opt.inp"), "w", newline=None + # ) as out: + # out.write("$opt \n") + # if self.job["optcycles"] is not None and float(self.job["optcycles"]) > 0: + # out.write(f"maxcycle={str(self.job['optcycles'])} \n") + # out.write(f"microcycle={str(self.job['optcycles'])} \n") + # out.write("average conv=true \n") + # out.write(f"hlow={self.job.get('hlow', 0.01)} \n") + # out.write("s6=30.00 \n") + # # remove unnecessary sp/gradient call in xTB + # out.write("engine=lbfgs\n") + # out.write("$end \n") + # callargs.append("-I") + # callargs.append("opt.inp") + # time.sleep(0.02) + # with open( + # os.path.join(self.job["workdir"], output), "w", newline=None + # ) as outputfile: + # returncode = subprocess.call( + # callargs, + # shell=False, + # stdin=None, + # stderr=subprocess.STDOUT, + # universal_newlines=False, + # cwd=self.job["workdir"], + # stdout=outputfile, + # env=ENVIRON, + # ) + # if returncode != 0: + # error_logical = True + # print( + # "ERROR: optimization in {:18} not converged".format( + # last_folders(self.job["workdir"], 2) + # ) + # ) + # time.sleep(0.02) + # # check if converged: + # if os.path.isfile(os.path.join(self.job["workdir"], output)): + # with open( + # os.path.join(self.job["workdir"], output), + # "r", + # encoding=CODING, + # newline=None, + # ) as inp: + # stor = inp.readlines() + # for line in stor: + # if ( + # "external code error" in line + # or "|grad| > 500, something is totally wrong!" in line + # or "abnormal termination of xtb" in line + # ): + # print( + # "ERROR: optimization in {:18} not converged".format( + # last_folders(self.job["workdir"], 2) + # ) + # ) + # error_logical = True + # break + # elif " FAILED TO CONVERGE GEOMETRY " in line: + # self.job["cycles"] += int(line.split()[7]) + # # self.job['cycles'] = int(line.split()[7]) + self.job['cycles'] + # self.job["converged"] = False + # elif "*** GEOMETRY OPTIMIZATION CONVERGED AFTER " in line: + # self.job["cycles"] += int(line.split()[5]) + # # self.job['cycles'] = int(line.split()[5]) + self.job['cycles'] + # self.job["converged"] = True + # with open( + # os.path.join(self.job["workdir"], output), + # "r", + # encoding=CODING, + # newline=None, + # ) as inp: + # for line in inp: + # if "total energy :" in line and not "gain" in line: + # self.job["ecyc"].append(float(line.split("Eh")[0].split()[-1])) + # else: + # print( + # "WARNING: {} doesn't exist!".format( + # os.path.join(self.job["workdir"], output) + # ) + # ) + # error_logical = True + # if os.path.isfile(os.path.join(self.job["workdir"], "energy")): + # with open( + # os.path.join(self.job["workdir"], "energy"), + # "r", + # encoding=CODING, + # newline=None, + # ) as energy: + # storage = energy.readlines() + # try: + # self.job["energy"] = float(storage[-2].split()[1]) + # self.job["success"] = True + # except ValueError: + # print( + # "ERROR while converting energy in {:18}".format( + # last_folders(self.job["workdir"], 2) + # ) + # ) + # else: + # error_logical = True + # if error_logical: + # self.job["energy"] = 0.0 + # self.job["success"] = False + # self.job["converged"] = False + # self.job["ecyc"] = [] + + # ********************************end _xtbopt*********************************** + + def _opt(self): + """ + Turbomole optimization using JOBEX, with adapted thresholds! + """ + pass + + def _nmr_coupling(self): + """ + Turbomole coupling constant calculation. + """ + print( + f"Running couplings calculation in {last_folders(self.job['workdir'], 2)}" + ) + # escf doesnot allow for mgrids! + with open( + os.path.join(self.job["workdir"], "control"), "r", newline=None + ) as inp: + tmp = inp.readlines() + with open( + os.path.join(self.job["workdir"], "control"), "w", newline=None + ) as out: + for line in tmp: + if "gridsize" in line: + out.write(f" gridsize {5} \n") + else: + out.write(line + "\n") + + with open( + os.path.join(self.job["workdir"], "escf.out"), "w", newline=None + ) as outputfile: + subprocess.call( + [self.job["progpath"], "-smpcpus", str(self.job["omp"])], + shell=False, + stdin=None, + stderr=subprocess.STDOUT, + universal_newlines=False, + cwd=self.job["workdir"], + stdout=outputfile, + env=ENVIRON, + ) + time.sleep(0.02) + # check for convergence + with open( + os.path.join(self.job["workdir"], "escf.out"), + "r", + encoding=CODING, + newline=None, + ) as inp: + stor = inp.readlines() + if " **** escf : all done ****\n" in stor: + self.job["success"] = True + else: + print( + "ERROR: coupling calculation failed in {:18}".format( + last_folders(self.job["workdir"], 1) + ) + ) + self.job["success"] = False + + def _nmr_shielding(self): + """ + Turbomole shielding constant calculation. + """ + print( + "Running shielding calculation in {}".format( + last_folders(self.job["workdir"], 2) + ) + ) + # update grid to m5! + with open( + os.path.join(self.job["workdir"], "control"), "r", newline=None + ) as inp: + tmp = inp.readlines() + with open( + os.path.join(self.job["workdir"], "control"), "w", newline=None + ) as out: + for line in tmp: + if "gridsize" in line: + out.write(f" gridsize {'m5'} \n") + if "$disp" in line and self.job["func"] in ("kt2", "kt1"): + pass + else: + out.write(line + "\n") + time.sleep(0.02) + + with open( + os.path.join(self.job["workdir"], "mpshift.out"), "w", newline=None + ) as outputfile: + subprocess.call( + [self.job["progpath"], "-smpcpus", str(self.job["omp"])], + shell=False, + stdin=None, + stderr=subprocess.STDOUT, + universal_newlines=False, + cwd=self.job["workdir"], + stdout=outputfile, + env=ENVIRON, + ) + time.sleep(0.02) + # check if shift calculation is converged: + with open( + os.path.join(self.job["workdir"], "mpshift.out"), + "r", + encoding=CODING, + newline=None, + ) as inp: + stor = inp.readlines() + found = False + for line in stor: + if " **** mpshift : all done ****" in line: + self.job["success"] = True + found = True + if not found: + print( + "ERROR: shielding calculation failed in {:18}".format( + last_folders(self.job["workdir"], 2) + ) + ) + self.job["success"] = False + + def _genericoutput(self): + """ + Read shielding and coupling constants and write them to plain output. + """ + fnameshield = "mpshift.out" + atom = [] + sigma = [] + try: + with open( + os.path.join(self.job["workdir"], fnameshield), + "r", + encoding=CODING, + newline=None, + ) as inp: + data = inp.readlines() + for line in data: + if ">>>>> DFT MAGNETIC SHIELDINGS <<<<<" in line: + start = data.index(line) + for line in data[start:]: + if "ATOM" in line: + splitted = line.split() + atom.append(int(splitted[2])) + sigma.append(float(splitted[4])) + except FileNotFoundError: + print( + "Missing file: {} in {}, Shielding constants are not written.".format( + fnameshield, last_folders(self.job["workdir"], 2) + ) + ) + self.job["success"] = False + except ValueError: + print("ERROR: ValueError in generic_output, nmrprop.dat can be flawed !") + self.job["success"] = False + self.job["success"] = True + fnamecoupl = "escf.out" + atom1 = [] + atom2 = [] + jab = [] + try: + with open( + os.path.join(self.job["workdir"], fnamecoupl), + "r", + encoding=CODING, + newline=None, + ) as inp: + data = inp.readlines() + for line in data: + if "Nuclear coupling constants" in line: + start = int(data.index(line)) + 3 + if "-----------------------------------" in line: + end = int(data.index(line)) + for line in data[start:end]: + if len(line.split()) in (6, 7): + splitted = line.split() + atom1.append(int(splitted[1])) + atom2.append(int(splitted[4].split(":")[0])) + jab.append(float(splitted[5])) + except FileNotFoundError: + print( + "Missing file: {} in {}, Coupling constants are not written.".format( + fnamecoupl, last_folders(self.job["workdir"], 2) + ) + ) + self.job["success"] = False + except ValueError: + print("ERROR: ValueError in generic_output, nmrprop.dat can be flawed") + self.job["success"] = False + self.job["success"] = True + with open( + os.path.join(self.job["workdir"], "nmrprop.dat"), "w", newline=None + ) as out: + s = sorted(zip(atom, sigma)) + atom, sigma = map(list, zip(*s)) + self.shieldings = dict(zip(atom, sigma)) + for i in range(len(atom)): + out.write("{:{digits}} {}\n".format(atom[i], sigma[i], digits=4)) + for i in range(self.job["nat"] - len(atom)): + out.write("\n") + for i in range(len(atom1)): + out.write( + "{:{digits}} {:{digits}} {}\n".format( + atom1[i], atom2[i], jab[i], digits=4 + ) + ) + time.sleep(0.02) + + def _optrot(self, silent=False): + """ + calculate optical rotation + """ + if not self.job["onlyread"]: + with open( + os.path.join(self.job["workdir"], "control"), "r", newline=None + ) as inp: + tmp = inp.readlines() + with open( + os.path.join(self.job["workdir"], "control"), "w", newline=None + ) as out: + for line in tmp: + if "functional" in line: + out.write(f" functional {self.job['func2']} \n") + elif "$disp" in line: + pass + else: + out.write(line + "\n") + + if not silent: + print( + f"Running optical-rotation calculation in {last_folders(self.job['workdir'], 2):18}" + ) + files = ["dipl_a", "dipole_a", "rhs_a"] + for file in files: + if os.path.isfile(os.path.join(self.job["workdir"], file)): + os.remove(os.path.join(self.job["workdir"], file)) + with open( + os.path.join(self.job["workdir"], "escf.out"), "w", newline=None + ) as outputfile: + subprocess.call( + [self.job["progpath"]], + shell=False, + stdin=None, + stderr=subprocess.STDOUT, + universal_newlines=False, + cwd=self.job["workdir"], + stdout=outputfile, + env=ENVIRON, + ) + time.sleep(0.02) + # check if scf is converged: + if os.path.isfile(os.path.join(self.job["workdir"], "escf.out")): + with open( + os.path.join(self.job["workdir"], "escf.out"), + "r", + encoding=CODING, + newline=None, + ) as inp: + stor = inp.readlines() + # -------------testnew + escf_ok = False + length_rep = False # (length representation) + velocity_rep = False # (velocity representation) + do_read_length = False + do_read_velocity = True + hybriddfa = ( + "pbe0", + "pw6b95", + "wb97x-d3", + "cam-b3lyp", + "b3-lyp", + "pbeh-3c", + "m06x", + "bh-lyp", + "tpssh", + ) + if self.job["func2"] in hybriddfa: + do_read_length = True + dum = 0 + frequencies = self.job["freq_or"] + frequencies.sort(reverse=True) + for line in stor: + if "escf ended normally" in line: + escf_ok = True + if " Frequency / nm: " in line: + freq = float(line.strip().split()[-1]) + if isclose(frequencies[dum], freq, abs_tol=0.6): + freq = float(frequencies[dum]) + else: + print("Can't find freq in nm!") + dum += 1 + if " specific rotation [alpha] in deg*[dm(g/cc)]^(-1)" in line: + if not length_rep: + length_rep = True + velocity_rep = False + elif length_rep: + velocity_rep = True + length_rep = False + if velocity_rep and do_read_velocity: + self.job["energy"] = 0.0 + self.job["success"] = True + self.job["energy2"] = 0.0 + self.job["erange1"][freq] = float(line.split("(-1)")[-1]) + elif length_rep and do_read_length: + print("Using length representation.") + self.job["energy"] = 0.0 + self.job["success"] = True + self.job["energy2"] = 0.0 + self.job["erange1"][freq] = float(line.split("(-1)")[-1]) + for freq in self.job["freq_or"]: + if freq not in self.job["erange1"].keys(): + escf_ok = False + if not escf_ok: + print( + "ERROR: in escf.out {:18} not converged!".format( + last_folders(self.job["workdir"], 2) + ) + ) + self.job["success"] = False + self.job["energy"] = 0.0 + self.job["energy2"] = 0.0 + self.job["erange1"] = {} + else: + print( + "WARNING: {} doesn't exist!".format( + os.path.join(self.job["workdir"], "escf.out") + ) + ) + self.job["success"] = False + self.job["energy"] = 0.0 + self.job["energy2"] = 0.0 + self.job["erange1"] = {} + + def execute(self): + """ + Choose what to execute for the jobtype + """ + if self.job["jobtype"] == "rrhoxtb": + self._xtbrrho() + elif self.job["jobtype"] == "xtb_sp": + self._xtb_sp() + elif self.job["jobtype"] == "prep": + self._prep_cefine() + elif self.job["jobtype"] in ("sp", "sp_implicit"): + if self.job["prepinfo"]: + # do cefine first + self._prep_cefine() + if not self.job["success"]: + return + self._sp() + elif self.job["jobtype"] == "cosmors": + self._cosmors() + elif self.job["jobtype"] == "xtbopt": + self._xtbopt() + elif self.job["jobtype"] == "genericout": + self._genericoutput() + elif self.job["jobtype"] in ("couplings", "couplings_sp"): + if self.job["prepinfo"]: + self._prep_cefine() + if not self.job["success"]: + return + if self.job["jobtype"] == "couplings_sp": + self._sp(silent=False) + if not self.job["success"]: + return + else: + try: + tmp_from = os.path.join(self.job["workdir"], "mos") + tmp_to = os.path.join(self.job["workdir"], "mos_J") + shutil.copy(tmp_from, tmp_to) + except FileNotFoundError: + pass + self._nmr_coupling() + elif self.job["jobtype"] in ("shieldings", "shieldings_sp"): + if self.job["prepinfo"]: + self._prep_cefine() + # print('performed cefine') + if not self.job["success"]: + return + if self.job["copymos"]: + # use mos as starting mos if basisJ == basisS but + # funcJ != funcS + try: + tmp_from = os.path.join(self.job["workdir"], "mos_J") + tmp_to = os.path.join(self.job["workdir"], "mos") + shutil.copy(tmp_from, tmp_to) + except FileNotFoundError: + pass + # print("copied mos") + if self.job["jobtype"] == "shieldings_sp": + self._sp(silent=False) + # print("ran sp") + if not self.job["success"]: + return + # print("running shieldings") + self._nmr_shielding() + elif self.job["jobtype"] in ("gbsa_gsolv", "alpb_gsolv"): + if self.job["prepinfo"]: + tmp_solvent = self.job["solvent"] + self.job["solvent"] = "gas" + self._prep_cefine() + if not self.job["success"]: + return + self._sp() + if not self.job["success"]: + return + self.job["solvent"] = tmp_solvent + self._xtb_gsolv() + elif self.job["jobtype"] in ("opt-rot", "opt-rot_sp"): + if self.job["prepinfo"]: + self._prep_cefine() + if not self.job["success"]: + return + if self.job["jobtype"] == "opt-rot_sp": + self._sp() + if not self.job["success"]: + return + self._optrot() + else: + print(f"JOBTYPE {self.job['jobtype']} UNKNOWN!") diff --git a/censo_qm/utilities.py b/censo_qm/utilities.py new file mode 100755 index 0000000..b013799 --- /dev/null +++ b/censo_qm/utilities.py @@ -0,0 +1,3095 @@ +""" +Utility functions which are used in the CENSO modules. From creating folders to +printout routines. +""" +import os +import sys +import shutil +import math +import hashlib +import time +import subprocess +from copy import deepcopy +from builtins import print as print_orig +from .cfg import ENVIRON, CODING, AU2J, AU2KCAL, BOHR2ANG, KB + + +def print(*args, **kwargs): + """ + patch print to always flush + """ + sep = " " + end = "\n" + file = None + flush = True + for key, value in kwargs.items(): + if key == "sep": + sep = value + elif key == "end": + end = value + elif key == "file": + file = value + elif key == "flush": + key = value + print_orig(*args, sep=sep, end=end, file=file, flush=flush) + + +def frange(start, end, step=1): + """ + range with floats + """ + try: + start = float(start) + end = float(end) + step = float(step) + except (ValueError, TypeError): + raise + if start > end: + tmp = start + start = end + end = tmp + count = 0 + while True: + temp = float(start + count * step) + if temp >= end: + break + yield temp + count += 1 + + +def mkdir_p(path): + """ + create mkdir -p like behaviour + """ + try: + os.makedirs(path) + except OSError as e: + if not os.path.isdir(path): + raise e + + +def print_block(strlist, width=80): + """Print all elements of strlist in block mode + e.g. within 80 characters then newline + - width [int] width of block + """ + length = 0 + try: + maxlen = max([len(str(x)) for x in strlist]) + except (ValueError, TypeError): + maxlen = 12 + for item in strlist: + length += maxlen + 2 + if length <= width: + if not item == strlist[-1]: # works only if item only once in list! + print("{:>{digits}}, ".format(str(item), digits=maxlen), end="") + else: + print("{:>{digits}}".format(str(item), digits=maxlen), end="") + else: + print("{:>{digits}}".format(str(item), digits=maxlen)) + length = 0 + if length != 0: + print("\n") + + +def t2x(path, writexyz=False, outfile="original.xyz"): + """convert TURBOMOLE coord file to xyz data and/or write *.xyz ouput + + - path [abs. path] does not need to include the filename coord + - writexyz [bool] default=False, directly write to outfile + - outfile [filename] default = 'original.xyz' filename of xyz file which + is written into the same directory as + returns: + - coordxyz --> list of strings including atom x y z information + - number of atoms + """ + if not os.path.basename(path) == "coord": + path = os.path.join(path, "coord") + with open(path, "r", encoding=CODING, newline=None) as f: + coord = f.readlines() + x = [] + y = [] + z = [] + atom = [] + for line in coord[1:]: + if "$" in line: # stop at $end ... + break + x.append(float(line.split()[0]) * BOHR2ANG) + y.append(float(line.split()[1]) * BOHR2ANG) + z.append(float(line.split()[2]) * BOHR2ANG) + atom.append(str(line.split()[3].lower())) + # natoms = int(len(coord[1:-1])) # unused + coordxyz = [] + for i in range(len(x)): + coordxyz.append( + "{:3} {: .10f} {: .10f} {: .10f}".format( + atom[i][0].upper() + atom[i][1:], x[i], y[i], z[i] + ) + ) + if writexyz: + with open( + os.path.join(os.path.split(path)[0], outfile), + "w", + encoding=CODING, + newline=None, + ) as out: + out.write(str(len(coordxyz)) + "\n\n") + for line in coordxyz: + out.write(line + "\n") + return coordxyz, int(len(coordxyz)) + + +def x2t(path, infile="inp.xyz"): + """convert file inp.xyz to TURBOMOLE coord file""" + if ".xyz" not in os.path.basename(path): + path = os.path.join(path, infile) + with open(path, "r", encoding=CODING, newline=None) as f: + xyz = f.readlines() + atom = [] + x = [] + y = [] + z = [] + for line in xyz[2:]: + atom.append(str(line.split()[0].lower())) + x.append(float(line.split()[1]) / BOHR2ANG) + y.append(float(line.split()[2]) / BOHR2ANG) + z.append(float(line.split()[3]) / BOHR2ANG) + coordxyz = [] + for i in range(len(x)): + coordxyz.append(f"{x[i]: .14f} {y[i]: .14f} {z[i]: .14f} {atom[i]}") + with open( + os.path.join(os.path.split(path)[0], "coord"), "w", newline=None + ) as coord: + coord.write("$coord\n") + for line in coordxyz: + coord.write(line + "\n") + coord.write("$end\n") + + +def write_trj( + results, cwd, outpath, optfolder, nat, attribute, overwrite=False, *args, **kwargs +): + """ + Write trajectory (multiple xyz geometries) to file. + """ + if overwrite and os.path.isfile(outpath): + os.remove(outpath) + for key, value in kwargs.items(): + if key == "rrho": + rrho = value + elif key == "energy": + energy = value + try: + rrho + except NameError: + rrho = None + try: + energy + except NameError: + energy = None + try: + with open(outpath, "a", encoding=CODING, newline=None) as out: + for conf in results: + conf_xyz, nat = t2x(os.path.join(cwd, "CONF" + str(conf.id), optfolder)) + ### coordinates in xyz + out.write(" {}\n".format(nat)) + xtbfree = conf.calc_free_energy( + e=energy, solv=None, rrho=rrho, out=True + ) + if xtbfree is not None: + xtbfree = f"{xtbfree:20.8f}" + out.write( + f"G(CENSO)= {getattr(conf, attribute):20.8f}" + f" G(xTB)= {xtbfree}" + f" !CONF{str(conf.id)}\n" + ) + for line in conf_xyz: + out.write(line + "\n") + except (FileExistsError, ValueError): + print("Could not write trajectory: {}.".format(last_folders(outpath, 1))) + + +def check_for_float(line): + """ Go through line and check for float, return first float""" + elements = line.strip().split() + value = None + for element in elements: + try: + value = float(element) + found = True + except ValueError: + found = False + value = None + if found: + break + return value + + +def last_folders(path, number=1): + """ + Return string of last folder or last two folders of path, depending on number + """ + if number not in (1, 2, 3): + number = 1 + if number == 1: + folder = os.path.basename(path) + if number == 2: + folder = os.path.join( + os.path.basename(os.path.dirname(path)), os.path.basename(path) + ) + if number == 3: + basename = os.path.basename(path) + dirname = os.path.basename(os.path.dirname(path)) + predirname = os.path.basename(os.path.split(os.path.split(path)[0])[0]) + folder = os.path.join(predirname, dirname, basename) + return folder + + +def get_energy_from_ensemble(path, config, conformers): + """ + Get energies from the ensemble inputfile and assign xtb_energy and + rel_xtb_energy + """ + with open(path, "r", encoding=CODING, newline=None) as inp: + data = inp.readlines() + if config.maxconf * (config.nat + 2) > len(data): + print( + f"ERROR: Either the number of conformers ({config.nconf}) " + f"or the number of atoms ({config.nat}) is wrong!" + ) + # calc energy and rel energy: + e = {} + conformers.sort(key=lambda x: int(x.id)) + for conf in conformers: + e[conf.id] = check_for_float(data[(conf.id - 1) * (config.nat + 2) + 1]) + try: + lowest = float(min([i for i in e.values() if i is not None])) + except (ValueError, TypeError): + print("WARNING: Can't calculate rel_xtb_energy!") + return + for conf in conformers: + try: + conf.xtb_energy = e[conf.id] + conf.rel_xtb_energy = (e[conf.id] - lowest) * AU2KCAL + # print(f"CONF{conf.id} {conf.xtb_energy} {conf.rel_xtb_energy}") + except (ValueError, TypeError) as e: + print(e) + return conformers + + +def ensemble2coord(config, foldername, conflist, store_confs, save_errors): + """ + read ensemble file: e.g. 'crest_conformers.xyz' and write coord files into + designated folders + + - path [abs path] to ensemble file + - nat [int] number of atoms in molecule + - nconf [int] number of considered conformers + - cwd [path] path to current working directory + - foldername [str] name of folder into which the coord file is to be written + - conflist [list with conf object] all conf objects + + returns list with conformer objects + """ + if not os.path.isfile(config.ensemblepath): + print(f"ERROR: File {os.path.basename(config.ensemblepath)} does not exist!") + with open(config.ensemblepath, "r", encoding=CODING, newline=None) as inp: + data = inp.readlines() + if config.maxconf * (config.nat + 2) > len(data): + print( + f"ERROR: Either the number of conformers ({config.nconf}) " + f"or the number of atoms ({config.nat}) is wrong!" + ) + for conf in conflist: + i = conf.id + atom = [] + x = [] + y = [] + z = [] + start = (i - 1) * (config.nat + 2) + 2 + end = i * (config.nat + 2) + for line in data[start:end]: + atom.append(str(line.split()[0].lower())) + x.append(float(line.split()[1]) / BOHR2ANG) + y.append(float(line.split()[2]) / BOHR2ANG) + z.append(float(line.split()[3]) / BOHR2ANG) + coordxyz = [] + for j in range(len(x)): + coordxyz.append(f"{x[j]: .14f} {y[j]: .14f} {z[j]: .14f} {atom[j]}") + outpath = os.path.join(config.cwd, "CONF" + str(conf.id), foldername, "coord") + if not os.path.isfile(outpath): + # print(f"Write new coord file in {last_folders(outpath)}") + with open(outpath, "w", newline=None) as coord: + coord.write("$coord\n") + for line in coordxyz: + coord.write(line + "\n") + coord.write("$end") + return conflist, store_confs, save_errors + + +def splitting(item): + """ + Used in move_recursively. + + """ + try: + return int(item.rsplit(".", 1)[1]) + except ValueError: + return 0 + + +def move_recursively(path, filename): + """ + Check if file or file.x exists and move them to file.x+1 ignores e.g. + file.save + """ + files = [ + f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f)) + ] # list of all files in directory + newfiles = [] # list of all files in directory that contain filename and '.' + for item in files: + if filename + "." in item: + newfiles.append(item) + newfiles.sort(key=splitting, reverse=True) + for item in newfiles: + try: + data = item.rsplit(".", 1) # splits only at last '.' + int(data[1]) + except ValueError: + continue + tmp_from = os.path.join(path, item) + newfilename = str(data[0]) + "." + str(int(data[1]) + 1) + tmp_to = os.path.join(path, newfilename) + # print("Backing up {} to {}.".format(item, newfilename)) + shutil.move(tmp_from, tmp_to) + + if filename in files: + print("Backing up {} to {}.".format(filename, filename + ".1")) + shutil.move(os.path.join(path, filename), os.path.join(path, filename + ".1")) + + +def calc_boltzmannweights(confs, property, T): + """ + Calculate Boltzmannweights: + - confs [list] list with conformer objects + - property [str] e.g. free_energy of conformer + - T [float] temperature at which the Boltzmann weight has to be evaluated + + returns confs + """ + if len(confs) == 1: + confs[0].bm_weight = 1.0 + return confs + try: + T = float(T) + except ValueError: + T = 298.15 # K + print(f"Temperature can not be converted and is therfore set to T = {T} K.") + if T == 0: + T += 0.00001 # avoid division by zero + try: + minfree = min( + [ + getattr(conf, property, None) + for conf in confs + if getattr(conf, property, None) is not None + ] + ) + except ValueError: + print("ERROR: Boltzmann weight can not be calculated!") + bsum = 0.0 + for item in confs: + bsum += getattr(item, "gi", 1.0) * math.exp( + -((item.free_energy - minfree) * AU2J) / (KB * T) + ) + for item in confs: + item.bm_weight = ( + getattr(item, "gi", 1.0) + * math.exp(-((item.free_energy - minfree) * AU2J) / (KB * T)) + / bsum + ) + return confs + + +def new_folders(cwd, conflist, foldername, save_errors, store_confs): + """ + create folders for all conformers in conflist + """ + + for conf in conflist: + tmp_dir = os.path.join(cwd, "CONF" + str(conf.id), foldername) + try: + mkdir_p(tmp_dir) + except Exception as e: + print(e) + if not os.path.isdir(tmp_dir): + print(f"ERROR: Could not create folder for CONF{conf.id}!") + print(f"CONF{conf.id} is removed, because IO failed!") + save_errors.append(f"CONF{conf.id} was removed, " "because IO failed!") + store_confs.append(conflist.pop(conflist.index(conf))) + print("Constructed folders!") + return save_errors, store_confs, conflist + + +def check_for_folder(path, conflist, foldername, debug=False): + """ + Check if folders exist (of conformers calculated in previous run) + """ + error_logical = False + for i in conflist: + tmp_dir = os.path.join(path, "CONF" + str(i), foldername) + if not os.path.exists(tmp_dir): + print( + f"ERROR: directory of {last_folders(tmp_dir, 2)} does not exist, although " + "it was calculated before!" + ) + error_logical = True + if error_logical and not debug: + print("One or multiple directories are missing.\n") + return error_logical + + +def do_md5(path): + """ + Calculate md5 of file to identifly if restart happend on the same file! + Input is buffered into smaller sizes to ease on memory consumption. + """ + BUF_SIZE = 65536 + md5 = hashlib.md5() + if os.path.isfile(path): + with open(path, "rb") as f: + while True: + data = f.read(BUF_SIZE) + if not data: + break + md5.update(data) + return md5.hexdigest() + else: + raise FileNotFoundError + + +def rank_simple(vector): + """ + needed to rank vectors + """ + return sorted(range(len(vector)), key=vector.__getitem__) + + +def rankdata(a): + """ + rank vectors like in numpy + """ + n = len(a) + ivec = rank_simple(a) + svec = [a[rank] for rank in ivec] + sumranks = 0 + dupcount = 0 + newarray = [0] * n + for i in range(n): + sumranks += i + dupcount += 1 + if i == n - 1 or svec[i] != svec[i + 1]: + averank = sumranks / float(dupcount) + 1 + for j in range(i - dupcount + 1, i + 1): + newarray[ivec[j]] = averank + sumranks = 0 + dupcount = 0 + return newarray + + +def pearson(A, B): + """ + Calculate pearson correlation coefficient + """ + if len(A) != len(B): + print("ERROR in PEARSON lists are not of equal length!") + n = float(len(A)) + muA = sum(A) / n + muB = sum(B) / n + diffA = map(lambda x: x - muA, A) + diffB = map(lambda x: x - muB, B) + stdA = math.sqrt((1 / (n - 1)) * sum([d * d for d in diffA])) + stdB = math.sqrt((1 / (n - 1)) * sum([d * d for d in diffB])) + try: + return (sum([A[i] * B[i] for i in range(int(n))]) - n * muA * muB) / ( + (n - 1) * stdA * stdB + ) + except ZeroDivisionError as e: + print("WARNING: ", e) + return 0.0 + + +def spearman(A, B): + """ + Calculate spearman correlation coefficient + """ + return pearson(rankdata(A), rankdata(B)) + + +def printout( + outputpath, + columncall, + columnheader, + columndescription, + columnformat, + calculate, + minfree, + columndescription2=[], +): + """ + Create printout which is printed to stdout and file. + """ + calculate.sort(key=lambda x: int(x.id)) + if not any( + [ + len(i) == len(columncall) + for i in (columnheader, columndescription, columnformat) + ] + ): + print("Lists of uneqal length!") + collength = [] + columnheaderprint = [] + columndescriptionprint = [] + columndescriptionprint2 = [] + if not columndescription2: + columndescription2 = ["" for _ in range(len(columncall))] + # split on "[" eg. COSMORS[B97-3c/def2-TZVP] + + for i in range(len(columndescription)): + if "[" in columndescription[i] and columndescription[i] not in ( + "[Eh]", + "[kcal/mol]", + "[a.u.]", + ): + columndescription2[i] = "[" + str(columndescription[i]).split("[")[1] + columndescription[i] = str(columndescription[i]).split("[")[0] + try: + for j in range(len(columncall)): + if columnformat[j]: + collength.append( + max( + [ + len(str(f"{i:{columnformat[j][0]}.{columnformat[j][1]}f}")) + for i in map(columncall[j], calculate) + ] + ) + ) + else: + collength.append(max([len(i) for i in map(columncall[j], calculate)])) + if ( + max( + len(i) + for i in [ + columndescription[j], + columnheader[j], + columndescription2[j], + ] + ) + > collength[j] + ): + collength[j] = max( + len(i) + for i in [ + columndescription[j], + columnheader[j], + columndescription2[j], + ] + ) + except (ValueError, TypeError) as e: + print(f"\n\nERRROR {e}") + for j in range(len(columncall)): + collength.append(12) + + for i in range(len(columncall)): + columnheaderprint.append(f"{columnheader[i]:>{collength[i]}}") + columndescriptionprint.append(f"{columndescription[i]:>{collength[i]}}") + if columndescription2: + columndescriptionprint2.append(f"{columndescription2[i]:>{collength[i]}}") + with open(outputpath, "w", newline=None) as out: + line = " ".join(columnheaderprint) + print(line) + out.write(line + "\n") + line = " ".join(columndescriptionprint) + print(line) + out.write(line + "\n") + if columndescription2: + line = " ".join(columndescriptionprint2) + print(line) + out.write(line + "\n") + for conf in calculate: + columncallprint = [] + for i in range(len(columncall)): + if columnformat[i]: + columncallprint.append( + f"{columncall[i](conf):{collength[i]}.{columnformat[i][1]}f}" + ) + else: + columncallprint.append(f"{columncall[i](conf):{collength[i]}}") + if conf.free_energy != minfree: + line = " ".join(columncallprint) + print(line) + out.write(line + "\n") + else: + line = " ".join(columncallprint + [f" <------"]) + print(line) + out.write(line + "\n") + + +def crest_routine(config, conformers, func, store_confs, prev_calculated=[]): + """ + check if two conformers are rotamers of each other, + this check is always performed, but removing conformers depends on crestcheck + returns conformers + returns store_confs + returns prev_calculated + """ + dirn = "conformer_rotamer_check" ### directory name + fn = "conformers.xyz" ### file name + + print("\nChecking for identical structures in ensemble with CREGEN!\n") + # create folder for comparison + if not os.path.isdir(os.path.join(config.cwd, dirn)): + mkdir_p(os.path.join(config.cwd, dirn)) + # delete conformers.xyz file if it already exists + if os.path.isfile(os.path.join(config.cwd, dirn, fn)): + os.remove(os.path.join(config.cwd, dirn, fn)) + # delete coord file if exists + if os.path.isfile(os.path.join(config.cwd, dirn, "coord")): + os.remove(os.path.join(config.cwd, dirn, "coord")) + + allconfs = deepcopy(conformers) + allconfs.extend(deepcopy(prev_calculated)) + + ### sort conformers according to energy of optimization + allconfs.sort(key=lambda conf: float(getattr(conf, "optimization_info")["energy"])) + # write coord: + try: + shutil.copy( + os.path.join(config.cwd, "CONF" + str(allconfs[0].id), func, "coord"), + os.path.join(config.cwd, dirn, "coord"), + ) + except Exception as e: + print(f"ERROR: {e}") + + # write conformers.xyz file + with open( + os.path.join(config.cwd, dirn, fn), "w", encoding=CODING, newline=None + ) as out: + for conf in allconfs: + conf_xyz, nat = t2x(os.path.join(config.cwd, "CONF" + str(conf.id), func)) + out.write(" {}\n".format(nat)) ### number of atoms + out.write( + "{:20.8f} !{}\n".format( + getattr(conf, "optimization_info")["energy"], "CONF" + str(conf.id) + ) + ) + for line in conf_xyz: + out.write(line + "\n") + for conf in allconfs: + conf_xyz, nat = t2x(os.path.join(config.cwd, "CONF" + str(conf.id), func)) + out.write(" {}\n".format(nat)) ### number of atoms + out.write( + "{:20.8f} !{}\n".format( + getattr(conf, "optimization_info")["energy"], "CONF" + str(conf.id) + ) + ) + for line in conf_xyz: + out.write(line + "\n") + time.sleep(0.01) + + crestcall = [ + config.external_paths["crestpath"], + "coord", + "-cregen", + fn, + "-ethr", + "0.15", + "-rthr", + "0.175", + "-bthr", + "0.03", + "-enso", + ] + + with open( + os.path.join(config.cwd, dirn, "crest.out"), "w", newline=None, encoding=CODING + ) as outputfile: + subprocess.call( + crestcall, + shell=False, + stdin=None, + stderr=subprocess.STDOUT, + universal_newlines=False, + cwd=os.path.join(config.cwd, dirn), + stdout=outputfile, + env=ENVIRON, + ) + time.sleep(0.05) + try: + with open( + os.path.join(config.cwd, dirn, "enso.tags"), + "r", + encoding=CODING, + newline=None, + ) as inp: + store = inp.readlines() + except (Exception) as e: + print(f"ERROR: {e}") + print("ERROR: output file (enso.tags) of CREST routine does not exist!") + keep = [] + if config.crestcheck: + try: + for line in store: + keep.append(line.split()[1][1:]) + for conf in list(conformers): + if "CONF" + str(conf.id) not in keep: + conf.optimization_info["info"] = "calculated" + conf.optimization_info["cregen_sort"] = "removed" + print( + f"!!!! Removing CONF{conf.id} because it is sorted " + "out by CREGEN." + ) + store_confs.append(conformers.pop(conformers.index(conf))) + for conf in list(prev_calculated): + if "CONF" + str(conf.id) not in keep: + conf.optimization_info["info"] = "calculated" + conf.optimization_info["cregen_sort"] = "removed" + print( + f"!!!! Removing CONF{conf.id} because it is sorted " + "out by CREGEN." + ) + store_confs.append(prev_calculated.pop(prev_calculated.index(conf))) + except (NameError, Exception) as e: + print(f"ERROR: {e}") + return conformers, prev_calculated, store_confs + + +def format_line(key, value, options, optionlength=70, dist_to_options=30): + """ + used in print_parameters + """ + # limit printout of possibilities + if len(str(options)) > optionlength: + length = 0 + reduced = [] + for item in options: + length += len(item) + 2 + if length < optionlength: + reduced.append(item) + reduced.append("...") + options = reduced + length = 0 + line = "{}: {:{digits}} # {} \n".format( + key, str(value), options, digits=dist_to_options - len(key) + ) + return line + + +def check_tasks(results, check=False, thresh=0.25): + """ + Check if too many tasks failed and exit if so! + """ + # Check if preparation failed too often: + counter = 0 + for item in results: + if not item.job["success"]: + counter += 1 + try: + fail_rate = float(counter) / float(len(results)) + except ZeroDivisionError: + print(f"ERROR: Too many calculations failed!" "\nGoing to exit!") + sys.exit(1) + if fail_rate >= thresh and check: + print( + f"ERROR: {fail_rate*100} % of the calculations failed!" "\nGoing to exit!" + ) + sys.exit(1) + elif fail_rate >= thresh: + print(f"WARNING: {fail_rate*100} % of the calculations failed!") + + +def isclose(value_a, value_b, rel_tol=1e-9, abs_tol=0.0): + """ + Replace function if not available from math module (exists since python 3.5) + """ + return abs(value_a - value_b) <= max( + rel_tol * max(abs(value_a), abs(value_b)), abs_tol + ) + + +def calc_std_dev(data): + """ + Calculate standard deviation + """ + n = len(data) + mean = sum(data) / n + variance = sum([(x - mean) ** 2 for x in data]) / (n - 1) + std_dev = math.sqrt(variance) + return std_dev + + +def calc_weighted_std_dev(data, weights=[]): + """ + Calculate standard deviation + """ + n = len(data) + if n == 0: + return 0.0 + if not weights or len(weights) < n: + weights = [1.0 for _ in range(n)] + w_mean = sum([data[i] * weights[i] for i in range(n)]) / sum(weights) + m = 0 + for i in weights: + if i != 0.0: + m += 1 + variance = sum([weights[i] * (data[i] - w_mean) ** 2 for i in range(n)]) / ( + (m - 1) * sum(weights) / m + ) + std_dev = math.sqrt(variance) + return std_dev + + +def write_anmrrc(config): + """ Write file .anmrrc with information processed by ANMR """ + h_tm_shieldings = { + "TMS": { + "pbeh-3c": { + "tpss": { + "gas": 32.0512048, + "acetone": 32.03971003333333, + "chcl3": 32.041133316666674, + "acetonitrile": 32.03617056666667, + "ch2cl2": 32.04777176666666, + "dmso": 32.039681316666666, + "h2o": 32.036860174999994, + "methanol": 32.04573335, + "thf": 32.04154705833333, + "toluene": 32.02829061666666, + }, + "pbe0": { + "gas": 31.820450258333327, + "acetone": 31.801199816666667, + "chcl3": 31.807363400000003, + "acetonitrile": 31.797744033333334, + "ch2cl2": 31.815502166666665, + "dmso": 31.797286500000002, + "h2o": 31.801018416666665, + "methanol": 31.809920125, + "thf": 31.802681225, + "toluene": 31.790892416666665, + }, + "pbeh-3c": { + "gas": 32.32369869999999, + "acetone": 32.30552229166667, + "chcl3": 32.30850654166667, + "acetonitrile": 32.3015773, + "ch2cl2": 32.31627083333333, + "dmso": 32.303862816666665, + "h2o": 32.30345545833333, + "methanol": 32.3130819, + "thf": 32.306951225, + "toluene": 32.29417180833333, + }, + }, + "b97-3c": { + "tpss": { + "gas": 32.099305599999994, + "acetone": 32.07685382499999, + "chcl3": 32.078372550000005, + "acetonitrile": 32.067920741666676, + "ch2cl2": 32.0876576, + "dmso": 32.07713496666667, + "h2o": 32.07222951666666, + "methanol": 32.085467083333334, + "thf": 32.07950451666667, + "toluene": 32.06162065, + }, + "pbe0": { + "gas": 31.869211950000004, + "acetone": 31.83879448333333, + "chcl3": 31.845031441666663, + "acetonitrile": 31.829924375, + "ch2cl2": 31.855811533333338, + "dmso": 31.835178675000005, + "h2o": 31.83680665833334, + "methanol": 31.850090208333338, + "thf": 31.841073758333337, + "toluene": 31.824697675, + }, + "pbeh-3c": { + "gas": 32.37107341666667, + "acetone": 32.341934458333334, + "chcl3": 32.34503841666666, + "acetonitrile": 32.332714675, + "ch2cl2": 32.35537393333334, + "dmso": 32.34058045833333, + "h2o": 32.338073200000004, + "methanol": 32.35207416666667, + "thf": 32.34418670833334, + "toluene": 32.32693729166667, + }, + }, + "tpss": { + "tpss": { + "gas": 31.86774000000001, + "acetone": 31.848927016666664, + "chcl3": 31.851003891666664, + "acetonitrile": 31.843538541666664, + "ch2cl2": 31.860415141666664, + "dmso": 31.849057266666673, + "h2o": 31.844762508333332, + "methanol": 31.857667625, + "thf": 31.851878716666665, + "toluene": 31.833541825, + }, + "pbe0": { + "gas": 31.636587116666664, + "acetone": 31.60924136666667, + "chcl3": 31.616506625, + "acetonitrile": 31.604173191666664, + "ch2cl2": 31.62743169166667, + "dmso": 31.604975658333334, + "h2o": 31.607992624999994, + "methanol": 31.620864658333335, + "thf": 31.611675816666665, + "toluene": 31.59546233333333, + }, + "pbeh-3c": { + "gas": 32.14311896666666, + "acetone": 32.11710325, + "chcl3": 32.12106585833333, + "acetonitrile": 32.11156126666667, + "ch2cl2": 32.1315459, + "dmso": 32.114840533333336, + "h2o": 32.11376850833333, + "methanol": 32.127508733333336, + "thf": 32.11950190833333, + "toluene": 32.1023676, + }, + }, + } + } + h_orca_shieldings = { + "TMS": { + "pbeh-3c": { + "tpss": { + "gas": 32.17000000000001, + "acetone": 32.09433333333334, + "chcl3": 32.10649999999999, + "acetonitrile": 32.09366666666667, + "ch2cl2": 32.099, + "dmso": 32.09466666666666, + "h2o": 32.10341666666666, + "methanol": 32.09250000000001, + "thf": 32.10183333333333, + "toluene": 32.122833333333325, + }, + "pbe0": { + "gas": 31.819000000000003, + "acetone": 31.732666666666663, + "chcl3": 31.747000000000003, + "acetonitrile": 31.73166666666667, + "ch2cl2": 31.738416666666666, + "dmso": 31.732666666666663, + "h2o": 31.741500000000002, + "methanol": 31.73066666666666, + "thf": 31.74116666666667, + "toluene": 31.765999999999995, + }, + "dsd-blyp": { + "gas": 31.91416666666667, + "acetone": 31.83541666666667, + "chcl3": 31.84766666666667, + "acetonitrile": 31.834666666666667, + "ch2cl2": 31.839916666666667, + "dmso": 31.835583333333332, + "h2o": 31.844166666666666, + "methanol": 31.833166666666667, + "thf": 31.842583333333334, + "toluene": 31.86475, + }, + "wb97x": { + "gas": 31.952, + "acetone": 31.867499999999996, + "chcl3": 31.880999999999997, + "acetonitrile": 31.866666666666664, + "ch2cl2": 31.872666666666664, + "dmso": 31.86758333333333, + "h2o": 31.876083333333337, + "methanol": 31.86533333333333, + "thf": 31.8755, + "toluene": 31.89966666666666, + }, + "pbeh-3c": { + "gas": 32.324999999999996, + "acetone": 32.23866666666667, + "chcl3": 32.25299999999999, + "acetonitrile": 32.23783333333333, + "ch2cl2": 32.24466666666667, + "dmso": 32.23866666666667, + "h2o": 32.24733333333333, + "methanol": 32.23666666666667, + "thf": 32.24733333333333, + "toluene": 32.272, + }, + "kt2": { + "gas": 31.817999999999998, + "acetone": 31.73233333333333, + "chcl3": 31.746333333333336, + "acetonitrile": 31.73133333333333, + "ch2cl2": 31.737666666666666, + "dmso": 31.73233333333333, + "h2o": 31.740666666666666, + "methanol": 31.73, + "thf": 31.740499999999994, + "toluene": 31.765666666666664, + }, + }, + "b97-3c": { + "tpss": { + "gas": 32.21800000000001, + "acetone": 32.140166666666666, + "chcl3": 32.152166666666666, + "acetonitrile": 32.140499999999996, + "ch2cl2": 32.145, + "dmso": 32.14183333333333, + "h2o": 32.175000000000004, + "methanol": 32.13766666666667, + "thf": 32.148, + "toluene": 32.168833333333325, + }, + "pbe0": { + "gas": 31.868, + "acetone": 31.778999999999996, + "chcl3": 31.792583333333337, + "acetonitrile": 31.778666666666663, + "ch2cl2": 31.784333333333336, + "dmso": 31.78016666666667, + "h2o": 31.815166666666666, + "methanol": 31.77633333333333, + "thf": 31.787500000000005, + "toluene": 31.812, + }, + "dsd-blyp": { + "gas": 31.962999999999997, + "acetone": 31.881250000000005, + "chcl3": 31.89325, + "acetonitrile": 31.881583333333335, + "ch2cl2": 31.886000000000006, + "dmso": 31.882583333333333, + "h2o": 31.916833333333333, + "methanol": 31.878500000000003, + "thf": 31.889, + "toluene": 31.910750000000004, + }, + "wb97x": { + "gas": 32.00091666666666, + "acetone": 31.913416666666663, + "chcl3": 31.9265, + "acetonitrile": 31.9135, + "ch2cl2": 31.918499999999995, + "dmso": 31.914666666666665, + "h2o": 31.94883333333333, + "methanol": 31.910666666666668, + "thf": 31.921500000000005, + "toluene": 31.94516666666667, + }, + "pbeh-3c": { + "gas": 32.373, + "acetone": 32.28366666666667, + "chcl3": 32.29716666666666, + "acetonitrile": 32.28333333333333, + "ch2cl2": 32.288666666666664, + "dmso": 32.284499999999994, + "h2o": 32.317166666666665, + "methanol": 32.28066666666667, + "thf": 32.29183333333334, + "toluene": 32.31616666666667, + }, + "kt2": { + "gas": 31.868, + "acetone": 31.778666666666663, + "chcl3": 31.792500000000004, + "acetonitrile": 31.778666666666663, + "ch2cl2": 31.784333333333336, + "dmso": 31.78033333333333, + "h2o": 31.794583333333332, + "methanol": 31.77633333333333, + "thf": 31.787500000000005, + "toluene": 31.812, + }, + }, + "tpss": { + "tpss": { + "gas": 31.97300000000001, + "acetone": 31.898, + "chcl3": 31.909500000000005, + "acetonitrile": 31.897833333333338, + "ch2cl2": 31.902666666666665, + "dmso": 31.898999999999997, + "h2o": 31.910666666666668, + "methanol": 31.89566666666667, + "thf": 31.90516666666667, + "toluene": 31.925, + }, + "pbe0": { + "gas": 31.625, + "acetone": 31.537166666666668, + "chcl3": 31.550499999999996, + "acetonitrile": 31.536666666666665, + "ch2cl2": 31.542500000000004, + "dmso": 31.537666666666667, + "h2o": 31.549500000000005, + "methanol": 31.53458333333334, + "thf": 31.545499999999993, + "toluene": 31.569, + }, + "dsd-blyp": { + "gas": 31.718000000000004, + "acetone": 31.639666666666667, + "chcl3": 31.651416666666663, + "acetonitrile": 31.639499999999998, + "ch2cl2": 31.644083333333338, + "dmso": 31.640416666666667, + "h2o": 31.65216666666667, + "methanol": 31.636916666666664, + "thf": 31.64683333333333, + "toluene": 31.667833333333334, + }, + "wb97x": { + "gas": 31.757, + "acetone": 31.672250000000002, + "chcl3": 31.68516666666667, + "acetonitrile": 31.67166666666667, + "ch2cl2": 31.6775, + "dmso": 31.67266666666666, + "h2o": 31.68466666666666, + "methanol": 31.66966666666667, + "thf": 31.680166666666665, + "toluene": 31.703, + }, + "pbeh-3c": { + "gas": 32.13400000000001, + "acetone": 32.047333333333334, + "chcl3": 32.06066666666667, + "acetonitrile": 32.04666666666666, + "ch2cl2": 32.05266666666666, + "dmso": 32.047666666666665, + "h2o": 32.059, + "methanol": 32.044666666666664, + "thf": 32.05566666666666, + "toluene": 32.079, + }, + "kt2": { + "gas": 31.622999999999994, + "acetone": 31.536666666666665, + "chcl3": 31.55, + "acetonitrile": 31.5365, + "ch2cl2": 31.54183333333333, + "dmso": 31.537666666666667, + "h2o": 31.548666666666666, + "methanol": 31.533833333333334, + "thf": 31.544833333333333, + "toluene": 31.56866666666667, + }, + }, + } + } + c_tm_shieldings = { + "TMS": { + "pbeh-3c": { + "tpss": { + "gas": 186.6465687, + "acetone": 187.27903107499998, + "chcl3": 187.238498325, + "acetonitrile": 187.372512775, + "ch2cl2": 187.0771589, + "dmso": 187.243299225, + "h2o": 187.37157565, + "methanol": 187.10988087500002, + "thf": 187.19458635, + "toluene": 187.48276635, + }, + "pbe0": { + "gas": 188.859355325, + "acetone": 189.6196798, + "chcl3": 189.4971041, + "acetonitrile": 189.698041075, + "ch2cl2": 189.318608125, + "dmso": 189.68253637499998, + "h2o": 189.65553119999998, + "methanol": 189.409198575, + "thf": 189.55889105, + "toluene": 189.776394325, + }, + "pbeh-3c": { + "gas": 198.41611147499998, + "acetone": 199.13367970000002, + "chcl3": 199.054179875, + "acetonitrile": 199.250248325, + "ch2cl2": 198.845265825, + "dmso": 199.185056825, + "h2o": 199.2289907, + "methanol": 198.917945675, + "thf": 199.076003325, + "toluene": 199.3931504, + }, + }, + "b97-3c": { + "tpss": { + "gas": 186.97419324999998, + "acetone": 187.496073025, + "chcl3": 187.45393565, + "acetonitrile": 187.554538075, + "ch2cl2": 187.31238564999998, + "dmso": 187.469466275, + "h2o": 187.57139320000002, + "methanol": 187.344972675, + "thf": 187.42200885, + "toluene": 187.671731225, + }, + "pbe0": { + "gas": 189.169130675, + "acetone": 189.816064175, + "chcl3": 189.69082477499998, + "acetonitrile": 189.860330875, + "ch2cl2": 189.532330975, + "dmso": 189.88587445000002, + "h2o": 189.8368566, + "methanol": 189.62332455, + "thf": 189.76569125, + "toluene": 189.94371412499999, + }, + "pbeh-3c": { + "gas": 198.7168509, + "acetone": 199.3308802, + "chcl3": 199.25125382500002, + "acetonitrile": 199.41320919999998, + "ch2cl2": 199.06108425, + "dmso": 199.390014125, + "h2o": 199.41478467500002, + "methanol": 199.13192775, + "thf": 199.28161922500001, + "toluene": 199.562540575, + }, + }, + "tpss": { + "tpss": { + "gas": 185.410099625, + "acetone": 185.99193982499997, + "chcl3": 185.949648475, + "acetonitrile": 186.0799505, + "ch2cl2": 185.80363820000002, + "dmso": 185.97415155, + "h2o": 186.07484635, + "methanol": 185.839592875, + "thf": 185.9190184, + "toluene": 186.17204557500003, + }, + "pbe0": { + "gas": 187.626469575, + "acetone": 188.34549135, + "chcl3": 188.212218325, + "acetonitrile": 188.413268225, + "ch2cl2": 188.04820440000003, + "dmso": 188.42875420000001, + "h2o": 188.3724699, + "methanol": 188.14698049999998, + "thf": 188.2963985, + "toluene": 188.46803717499998, + }, + "pbeh-3c": { + "gas": 197.27823677499998, + "acetone": 197.953274625, + "chcl3": 197.871683925, + "acetonitrile": 198.0615831, + "ch2cl2": 197.6764831, + "dmso": 198.014841225, + "h2o": 198.048432475, + "methanol": 197.75143105, + "thf": 197.905333025, + "toluene": 198.186480775, + }, + }, + } + } + c_orca_shieldings = { + "TMS": { + "pbeh-3c": { + "tpss": { + "gas": 188.604, + "acetone": 189.7395, + "chcl3": 189.5435, + "acetonitrile": 189.77, + "ch2cl2": 189.6625, + "dmso": 189.8015, + "h2o": 189.8495, + "methanol": 189.77, + "thf": 189.647, + "toluene": 189.30400000000003, + }, + "pbe0": { + "gas": 188.867, + "acetone": 190.265, + "chcl3": 190.02224999999999, + "acetonitrile": 190.298, + "ch2cl2": 190.16649999999998, + "dmso": 190.33175, + "h2o": 190.38799999999998, + "methanol": 190.29875, + "thf": 190.1445, + "toluene": 189.73375, + }, + "dsd-blyp": { + "gas": 191.37099999999998, + "acetone": 192.606, + "chcl3": 192.385, + "acetonitrile": 192.63599999999997, + "ch2cl2": 192.51575000000003, + "dmso": 192.66625000000002, + "h2o": 192.7205, + "methanol": 192.63524999999998, + "thf": 192.4955, + "toluene": 192.12275, + }, + "wb97x": { + "gas": 190.36075, + "acetone": 191.689, + "chcl3": 191.453, + "acetonitrile": 191.72175000000001, + "ch2cl2": 191.5935, + "dmso": 191.753, + "h2o": 191.8085, + "methanol": 191.72150000000002, + "thf": 191.57150000000001, + "toluene": 191.17225, + }, + "pbeh-3c": { + "gas": 198.458, + "acetone": 199.905, + "chcl3": 199.649, + "acetonitrile": 199.94, + "ch2cl2": 199.8025, + "dmso": 199.9715, + "h2o": 200.0265, + "methanol": 199.93900, + "thf": 199.7775, + "toluene": 199.3395, + }, + "kt2": { + "gas": 190.719, + "acetone": 191.988, + "chcl3": 191.7645, + "acetonitrile": 192.019, + "ch2cl2": 191.8965, + "dmso": 192.05150000000003, + "h2o": 192.1055, + "methanol": 192.02, + "thf": 191.8775, + "toluene": 191.4905, + }, + }, + "b97-3c": { + "tpss": { + "gas": 188.908, + "acetone": 190.0265, + "chcl3": 189.83749999999998, + "acetonitrile": 190.062, + "ch2cl2": 189.954, + "dmso": 190.103, + "h2o": 190.07774999999998, + "methanol": 190.0595, + "thf": 189.9445, + "toluene": 189.614, + }, + "pbe0": { + "gas": 189.18025, + "acetone": 190.57025000000002, + "chcl3": 190.33075, + "acetonitrile": 190.60525, + "ch2cl2": 190.47, + "dmso": 190.65175, + "h2o": 190.59925000000004, + "methanol": 190.60775, + "thf": 190.456, + "toluene": 190.058, + }, + "dsd-blyp": { + "gas": 191.66199999999998, + "acetone": 192.88025, + "chcl3": 192.66174999999998, + "acetonitrile": 192.915, + "ch2cl2": 192.79025, + "dmso": 192.95425, + "h2o": 192.91275000000002, + "methanol": 192.91250000000002, + "thf": 192.77625, + "toluene": 192.4135, + }, + "wb97x": { + "gas": 190.65525, + "acetone": 191.97199999999998, + "chcl3": 191.73825, + "acetonitrile": 192.00725, + "ch2cl2": 191.875, + "dmso": 192.04950000000002, + "h2o": 191.99675000000002, + "methanol": 192.007, + "thf": 191.86025, + "toluene": 191.47125, + }, + "pbeh-3c": { + "gas": 198.752, + "acetone": 200.196, + "chcl3": 199.9445, + "acetonitrile": 200.23250000000002, + "ch2cl2": 200.0925, + "dmso": 200.277, + "h2o": 200.15925, + "methanol": 200.23350000000002, + "thf": 200.075, + "toluene": 199.65050000000002, + }, + "kt2": { + "gas": 191.037, + "acetone": 192.29649999999998, + "chcl3": 192.0765, + "acetonitrile": 192.3275, + "ch2cl2": 192.20350000000002, + "dmso": 192.3755, + "h2o": 192.188, + "methanol": 192.33275, + "thf": 192.1925, + "toluene": 191.8175, + }, + }, + "tpss": { + "tpss": { + "gas": 187.22, + "acetone": 188.442, + "chcl3": 188.214, + "acetonitrile": 188.4745, + "ch2cl2": 188.351, + "dmso": 188.5115, + "h2o": 188.58350000000002, + "methanol": 188.473, + "thf": 188.33950000000002, + "toluene": 187.965, + }, + "pbe0": { + "gas": 187.5725, + "acetone": 188.99225, + "chcl3": 188.73424999999997, + "acetonitrile": 189.0295, + "ch2cl2": 188.8875, + "dmso": 189.06875, + "h2o": 189.14175, + "methanol": 189.0275, + "thf": 188.8665, + "toluene": 188.4305, + }, + "dsd-blyp": { + "gas": 190.06825, + "acetone": 191.39, + "chcl3": 191.15425, + "acetonitrile": 191.42600000000002, + "ch2cl2": 191.29475000000002, + "dmso": 191.461, + "h2o": 191.53225, + "methanol": 191.4225, + "thf": 191.27499999999998, + "toluene": 190.87675000000002, + }, + "wb97x": { + "gas": 189.04575, + "acetone": 190.45225000000002, + "chcl3": 190.20074999999997, + "acetonitrile": 190.4885, + "ch2cl2": 190.35025000000002, + "dmso": 190.52525, + "h2o": 190.5975, + "methanol": 190.4855, + "thf": 190.32899999999998, + "toluene": 189.904, + }, + "pbeh-3c": { + "gas": 197.184, + "acetone": 198.7195, + "chcl3": 198.449, + "acetonitrile": 198.75799999999998, + "ch2cl2": 198.611, + "dmso": 198.7955, + "h2o": 198.8655, + "methanol": 198.755, + "thf": 198.587, + "toluene": 198.1245, + }, + "kt2": { + "gas": 189.386, + "acetone": 190.7245, + "chcl3": 190.488, + "acetonitrile": 190.7585, + "ch2cl2": 190.6275, + "dmso": 190.7975, + "h2o": 190.87900000000002, + "methanol": 190.75799999999998, + "thf": 190.6095, + "toluene": 190.2095, + }, + }, + } + } + f_tm_shieldings = { + "CFCl3": { + "pbeh-3c": { + "tpss": { + "gas": 163.5665883, + "acetone": 165.9168679, + "chcl3": 165.043061, + "acetonitrile": 166.377831, + "ch2cl2": 164.776383, + "dmso": 166.1839641, + "h2o": 166.880495, + "methanol": 165.4364879, + "thf": 165.7384153, + "toluene": 165.7812123, + }, + "pbe0": { + "gas": 179.4820255, + "acetone": 181.9743764, + "chcl3": 181.1338758, + "acetonitrile": 182.4438224, + "ch2cl2": 180.8751895, + "dmso": 182.2224636, + "h2o": 182.9958356, + "methanol": 181.5031528, + "thf": 181.7669891, + "toluene": 181.7963177, + }, + "pbeh-3c": { + "gas": 225.045234, + "acetone": 226.6335916, + "chcl3": 226.0133192, + "acetonitrile": 226.9371636, + "ch2cl2": 225.8300352, + "dmso": 226.8061873, + "h2o": 227.4000142, + "methanol": 226.3012569, + "thf": 226.5247654, + "toluene": 226.555523, + }, + }, + "b97-3c": { + "tpss": { + "gas": 150.4514566, + "acetone": 151.5612999, + "chcl3": 150.5819485, + "acetonitrile": 151.9884593, + "ch2cl2": 150.2953968, + "dmso": 151.8818575, + "h2o": 151.6179136, + "methanol": 151.0439011, + "thf": 151.4207377, + "toluene": 151.4686522, + }, + "pbe0": { + "gas": 167.7783433, + "acetone": 169.09491, + "chcl3": 168.1354478, + "acetonitrile": 169.5416871, + "ch2cl2": 167.8558489, + "dmso": 169.3950732, + "h2o": 169.2178304, + "methanol": 168.5860848, + "thf": 168.9136991, + "toluene": 168.9347931, + }, + "pbeh-3c": { + "gas": 213.6651892, + "acetone": 214.1284506, + "chcl3": 213.4293417, + "acetonitrile": 214.4297108, + "ch2cl2": 213.2298905, + "dmso": 214.366451, + "h2o": 214.1162368, + "methanol": 213.76845, + "thf": 214.0512078, + "toluene": 214.0924969, + }, + }, + "tpss": { + "tpss": { + "gas": 146.4091676, + "acetone": 148.7113398, + "chcl3": 147.7715256, + "acetonitrile": 149.1854535, + "ch2cl2": 147.4708159, + "dmso": 148.9781692, + "h2o": 148.8407317, + "methanol": 148.1815132, + "thf": 148.5140784, + "toluene": 148.6001306, + }, + "pbe0": { + "gas": 163.4654205, + "acetone": 165.9356023, + "chcl3": 165.0269644, + "acetonitrile": 166.4188044, + "ch2cl2": 164.7336009, + "dmso": 166.1830401, + "h2o": 166.0858984, + "methanol": 165.4145633, + "thf": 165.7038144, + "toluene": 165.7726604, + }, + "pbeh-3c": { + "gas": 209.8752809, + "acetone": 211.4025693, + "chcl3": 210.7286529, + "acetonitrile": 211.7120494, + "ch2cl2": 210.5166504, + "dmso": 211.5990015, + "h2o": 211.4250312, + "methanol": 211.0321396, + "thf": 211.2798891, + "toluene": 211.3499046, + }, + }, + } + } + f_orca_shieldings = { + "CFCl3": { + "pbeh-3c": { + "tpss": { + "gas": 166.028, + "acetone": 167.858, + "chcl3": 167.569, + "acetonitrile": 167.92, + "ch2cl2": 167.732, + "dmso": 167.992, + "h2o": 168.239, + "methanol": 167.889, + "thf": 167.737, + "toluene": 167.278, + }, + "pbe0": { + "gas": 178.99, + "acetone": 181.086, + "chcl3": 180.741, + "acetonitrile": 181.154, + "ch2cl2": 180.939, + "dmso": 181.224, + "h2o": 181.464, + "methanol": 181.123, + "thf": 180.934, + "toluene": 180.377, + }, + "dsd-blyp": { + "gas": 225.542, + "acetone": 227.877, + "chcl3": 227.478, + "acetonitrile": 227.949, + "ch2cl2": 227.712, + "dmso": 228.007, + "h2o": 228.213, + "methanol": 227.919, + "thf": 227.691, + "toluene": 227.033, + }, + "wb97x": { + "gas": 193.433, + "acetone": 195.381, + "chcl3": 195.059, + "acetonitrile": 195.445, + "ch2cl2": 195.245, + "dmso": 195.508, + "h2o": 195.733, + "methanol": 195.415, + "thf": 195.239, + "toluene": 194.719, + }, + "pbeh-3c": { + "gas": 224.834, + "acetone": 226.308, + "chcl3": 226.076, + "acetonitrile": 226.36, + "ch2cl2": 226.207, + "dmso": 226.424, + "h2o": 226.639, + "methanol": 226.333, + "thf": 226.215, + "toluene": 225.843, + }, + "kt2": { + "gas": 144.178, + "acetone": 146.15, + "chcl3": 145.821, + "acetonitrile": 146.219, + "ch2cl2": 146.007, + "dmso": 146.298, + "h2o": 146.569, + "methanol": 146.185, + "thf": 146.012, + "toluene": 145.488, + }, + }, + "b97-3c": { + "tpss": { + "gas": 153.325, + "acetone": 153.259, + "chcl3": 152.987, + "acetonitrile": 153.326, + "ch2cl2": 153.137, + "dmso": 153.425, + "h2o": 153.729, + "methanol": 153.292, + "thf": 153.16, + "toluene": 152.731, + }, + "pbe0": { + "gas": 167.245, + "acetone": 167.447, + "chcl3": 167.121, + "acetonitrile": 167.52, + "ch2cl2": 167.31, + "dmso": 167.626, + "h2o": 167.92, + "methanol": 167.486, + "thf": 167.322, + "toluene": 166.785, + }, + "dsd-blyp": { + "gas": 216.287, + "acetone": 217.144, + "chcl3": 216.726, + "acetonitrile": 217.223, + "ch2cl2": 216.969, + "dmso": 217.304, + "h2o": 217.555, + "methanol": 217.19, + "thf": 216.957, + "toluene": 216.272, + }, + "wb97x": { + "gas": 182.767, + "acetone": 182.921, + "chcl3": 182.602, + "acetonitrile": 182.99, + "ch2cl2": 182.783, + "dmso": 183.077, + "h2o": 183.351, + "methanol": 182.957, + "thf": 182.792, + "toluene": 182.279, + }, + "pbeh-3c": { + "gas": 213.421, + "acetone": 213.215, + "chcl3": 212.997, + "acetonitrile": 213.271, + "ch2cl2": 213.116, + "dmso": 213.36, + "h2o": 213.627, + "methanol": 213.241, + "thf": 213.14, + "toluene": 212.796, + }, + "kt2": { + "gas": 130.539, + "acetone": 130.291, + "chcl3": 130.081, + "acetonitrile": 130.364, + "ch2cl2": 130.242, + "dmso": 130.472, + "h2o": 130.803, + "methanol": 130.326, + "thf": 130.267, + "toluene": 129.808, + }, + }, + "tpss": { + "tpss": { + "gas": 148.387, + "acetone": 149.573, + "chcl3": 149.247, + "acetonitrile": 149.647, + "ch2cl2": 149.43, + "dmso": 149.748, + "h2o": 150.066, + "methanol": 149.609, + "thf": 149.446, + "toluene": 148.927, + }, + "pbe0": { + "gas": 162.075, + "acetone": 163.638, + "chcl3": 163.239, + "acetonitrile": 163.71, + "ch2cl2": 163.472, + "dmso": 163.807, + "h2o": 164.125, + "methanol": 163.671, + "thf": 163.476, + "toluene": 162.835, + }, + "dsd-blyp": { + "gas": 211.635, + "acetone": 213.66, + "chcl3": 213.199, + "acetonitrile": 213.746, + "ch2cl2": 213.469, + "dmso": 213.828, + "h2o": 214.092, + "methanol": 213.71, + "thf": 213.451, + "toluene": 212.692, + }, + "wb97x": { + "gas": 177.986, + "acetone": 179.452, + "chcl3": 179.093, + "acetonitrile": 179.528, + "ch2cl2": 179.299, + "dmso": 179.616, + "h2o": 179.902, + "methanol": 179.491, + "thf": 179.302, + "toluene": 178.721, + }, + "pbeh-3c": { + "gas": 208.73, + "acetone": 209.687, + "chcl3": 209.429, + "acetonitrile": 209.749, + "ch2cl2": 209.573, + "dmso": 209.825, + "h2o": 210.102, + "methanol": 209.716, + "thf": 209.592, + "toluene": 209.176, + }, + "kt2": { + "gas": 124.897, + "acetone": 126.154, + "chcl3": 125.806, + "acetonitrile": 126.235, + "ch2cl2": 126.001, + "dmso": 126.345, + "h2o": 126.689, + "methanol": 126.193, + "thf": 126.019, + "toluene": 125.465, + }, + }, + } + } + p_tm_shieldings = { + "PH3": { + "pbeh-3c": { + "tpss": { + "gas": 560.9783608, + "acetone": 559.5567974, + "chcl3": 555.7297268, + "acetonitrile": 558.7420853, + "ch2cl2": 555.9207578, + "dmso": 559.0317956, + "h2o": 551.9868157, + "methanol": 557.7229598, + "thf": 559.4070044, + "toluene": 558.9538264, + }, + "pbe0": { + "gas": 573.7889709, + "acetone": 572.6807308, + "chcl3": 568.6200619, + "acetonitrile": 572.0156003, + "ch2cl2": 568.6775273, + "dmso": 572.2984368, + "h2o": 564.8512663, + "methanol": 570.6948985, + "thf": 572.4491708, + "toluene": 572.2945282, + }, + "pbeh-3c": { + "gas": 622.6149401, + "acetone": 624.221383, + "chcl3": 622.2460822, + "acetonitrile": 624.0839458, + "ch2cl2": 622.3660073, + "dmso": 623.8685076, + "h2o": 622.54767, + "methanol": 623.1569748, + "thf": 623.7253948, + "toluene": 623.2733775, + }, + }, + "b97-3c": { + "tpss": { + "gas": 559.5296772, + "acetone": 557.5438599, + "chcl3": 553.7653249, + "acetonitrile": 556.735552, + "ch2cl2": 554.1613395, + "dmso": 557.010476, + "h2o": 550.1185847, + "methanol": 555.82703, + "thf": 557.2207586, + "toluene": 556.8427805, + }, + "pbe0": { + "gas": 572.4232552, + "acetone": 570.7398164, + "chcl3": 566.7271447, + "acetonitrile": 570.0779914, + "ch2cl2": 566.9826221, + "dmso": 570.3456887, + "h2o": 563.05667, + "methanol": 568.8622417, + "thf": 570.3305746, + "toluene": 570.2507738, + }, + "pbeh-3c": { + "gas": 621.2286124, + "acetone": 622.356702, + "chcl3": 620.3365742, + "acetonitrile": 622.2263079, + "ch2cl2": 620.6570087, + "dmso": 621.9912341, + "h2o": 620.7021951, + "methanol": 621.3567408, + "thf": 621.7091401, + "toluene": 621.3088355, + }, + }, + "tpss": { + "tpss": { + "gas": 558.1589032, + "acetone": 556.5475548, + "chcl3": 553.3273579, + "acetonitrile": 555.6559443, + "ch2cl2": 553.600544, + "dmso": 556.0983125, + "h2o": 548.970911, + "methanol": 555.4535832, + "thf": 556.3191064, + "toluene": 555.9299261, + }, + "pbe0": { + "gas": 571.012794, + "acetone": 569.7250563, + "chcl3": 566.2936179, + "acetonitrile": 568.9923465, + "ch2cl2": 566.4237381, + "dmso": 569.4236946, + "h2o": 561.898531, + "methanol": 568.4989088, + "thf": 569.4140377, + "toluene": 569.3191735, + }, + "pbeh-3c": { + "gas": 620.0674752, + "acetone": 621.5116584, + "chcl3": 619.9397925, + "acetonitrile": 621.2898165, + "ch2cl2": 620.15928, + "dmso": 621.2154327, + "h2o": 619.7280828, + "methanol": 621.0126668, + "thf": 620.9449236, + "toluene": 620.5363442, + }, + }, + }, + "TMP": { + "pbeh-3c": { + "tpss": { + "gas": 281.6302978, + "acetone": 265.4354914, + "chcl3": 257.5409613, + "acetonitrile": 263.2430698, + "ch2cl2": 257.0543221, + "dmso": 262.8752182, + "h2o": 242.4838211, + "methanol": 245.6431135, + "thf": 266.7188352, + "toluene": 269.0597797, + }, + "pbe0": { + "gas": 277.8252556, + "acetone": 261.5502528, + "chcl3": 254.1109855, + "acetonitrile": 259.5059377, + "ch2cl2": 253.6358478, + "dmso": 258.7821425, + "h2o": 239.5329333, + "methanol": 242.1687948, + "thf": 262.8378646, + "toluene": 265.4050199, + }, + "pbeh-3c": { + "gas": 390.6073841, + "acetone": 378.6668397, + "chcl3": 373.2000393, + "acetonitrile": 377.1343123, + "ch2cl2": 372.9163524, + "dmso": 376.6203422, + "h2o": 362.7163813, + "methanol": 364.8220379, + "thf": 379.5051748, + "toluene": 381.2789752, + }, + }, + "b97-3c": { + "tpss": { + "gas": 276.8654211, + "acetone": 259.8829696, + "chcl3": 251.5648819, + "acetonitrile": 257.7225804, + "ch2cl2": 251.0880934, + "dmso": 256.90761, + "h2o": 234.4800595, + "methanol": 237.4630709, + "thf": 261.291204, + "toluene": 263.9827571, + }, + "pbe0": { + "gas": 273.0911933, + "acetone": 256.1507446, + "chcl3": 248.2072561, + "acetonitrile": 254.0571117, + "ch2cl2": 247.7513367, + "dmso": 253.0100842, + "h2o": 231.7425518, + "methanol": 234.1695454, + "thf": 257.4644157, + "toluene": 260.3717755, + }, + "pbeh-3c": { + "gas": 386.2437698, + "acetone": 373.8145109, + "chcl3": 368.1719462, + "acetonitrile": 372.350904, + "ch2cl2": 367.8934403, + "dmso": 371.4995766, + "h2o": 355.9965281, + "methanol": 358.0517851, + "thf": 374.7716841, + "toluene": 376.8283779, + }, + }, + "tpss": { + "tpss": { + "gas": 278.0447826, + "acetone": 261.4382678, + "chcl3": 253.5317417, + "acetonitrile": 259.5831076, + "ch2cl2": 253.0735218, + "dmso": 258.8205488, + "h2o": 236.9938311, + "methanol": 240.0596152, + "thf": 262.646474, + "toluene": 265.5482099, + }, + "pbe0": { + "gas": 274.1582231, + "acetone": 257.5976215, + "chcl3": 250.0455696, + "acetonitrile": 255.8739799, + "ch2cl2": 249.6032437, + "dmso": 254.7109046, + "h2o": 234.1066151, + "methanol": 236.6658834, + "thf": 258.6914971, + "toluene": 261.8410368, + }, + "pbeh-3c": { + "gas": 387.4697022, + "acetone": 375.2569197, + "chcl3": 369.9533245, + "acetonitrile": 374.0256406, + "ch2cl2": 369.6688695, + "dmso": 373.1520781, + "h2o": 358.1827766, + "methanol": 360.3168296, + "thf": 376.0015788, + "toluene": 378.3153047, + }, + }, + }, + } + p_orca_shieldings = { + "PH3": { + "pbeh-3c": { + "tpss": { + "gas": 578.49, + "acetone": 577.53, + "chcl3": 577.773, + "acetonitrile": 577.631, + "ch2cl2": 577.63, + "dmso": 577.688, + "h2o": 577.764, + "methanol": 577.506, + "thf": 577.671, + "toluene": 577.946, + }, + "pbe0": { + "gas": 573.639, + "acetone": 573.637, + "chcl3": 573.71, + "acetonitrile": 573.764, + "ch2cl2": 573.67, + "dmso": 573.829, + "h2o": 573.914, + "methanol": 573.632, + "thf": 573.688, + "toluene": 573.665, + }, + "dsd-blyp": { + "gas": 569.431, + "acetone": 567.575, + "chcl3": 567.994, + "acetonitrile": 567.65, + "ch2cl2": 567.746, + "dmso": 567.695, + "h2o": 567.745, + "methanol": 567.531, + "thf": 567.809, + "toluene": 568.372, + }, + "wb97x": { + "gas": 568.27, + "acetone": 568.185, + "chcl3": 568.261, + "acetonitrile": 568.31, + "ch2cl2": 568.218, + "dmso": 568.375, + "h2o": 568.459, + "methanol": 568.18, + "thf": 568.236, + "toluene": 568.231, + }, + "pbeh-3c": { + "gas": 622.505, + "acetone": 626.377, + "chcl3": 625.536, + "acetonitrile": 626.609, + "ch2cl2": 626.042, + "dmso": 626.709, + "h2o": 626.85, + "methanol": 626.48, + "thf": 625.933, + "toluene": 624.513, + }, + "kt2": { + "gas": 587.254, + "acetone": 587.821, + "chcl3": 587.78, + "acetonitrile": 587.962, + "ch2cl2": 587.81, + "dmso": 588.032, + "h2o": 588.129, + "methanol": 587.829, + "thf": 587.812, + "toluene": 587.606, + }, + }, + "b97-3c": { + "tpss": { + "gas": 574.673, + "acetone": 575.587, + "chcl3": 575.672, + "acetonitrile": 575.6, + "ch2cl2": 575.619, + "dmso": 575.662, + "h2o": 575.948, + "methanol": 575.57, + "thf": 575.668, + "toluene": 575.8, + }, + "pbe0": { + "gas": 569.721, + "acetone": 571.667, + "chcl3": 571.577, + "acetonitrile": 571.703, + "ch2cl2": 571.631, + "dmso": 571.774, + "h2o": 572.075, + "methanol": 571.67, + "thf": 571.656, + "toluene": 571.48, + }, + "dsd-blyp": { + "gas": 565.936, + "acetone": 565.88, + "chcl3": 566.179, + "acetonitrile": 565.866, + "ch2cl2": 566.012, + "dmso": 565.915, + "h2o": 566.166, + "methanol": 565.843, + "thf": 566.084, + "toluene": 566.506, + }, + "wb97x": { + "gas": 564.429, + "acetone": 566.244, + "chcl3": 566.161, + "acetonitrile": 566.279, + "ch2cl2": 566.206, + "dmso": 566.349, + "h2o": 566.646, + "methanol": 566.247, + "thf": 566.233, + "toluene": 566.086, + }, + "pbeh-3c": { + "gas": 618.99, + "acetone": 624.483, + "chcl3": 623.499, + "acetonitrile": 624.639, + "ch2cl2": 624.087, + "dmso": 624.744, + "h2o": 625.072, + "methanol": 624.593, + "thf": 623.983, + "toluene": 622.448, + }, + "kt2": { + "gas": 583.324, + "acetone": 585.797, + "chcl3": 585.592, + "acetonitrile": 585.848, + "ch2cl2": 585.715, + "dmso": 585.925, + "h2o": 586.235, + "methanol": 585.813, + "thf": 585.725, + "toluene": 585.371, + }, + }, + "tpss": { + "tpss": { + "gas": 574.839, + "acetone": 574.09, + "chcl3": 574.267, + "acetonitrile": 574.11, + "ch2cl2": 574.167, + "dmso": 574.166, + "h2o": 574.435, + "methanol": 574.084, + "thf": 574.22, + "toluene": 574.478, + }, + "pbe0": { + "gas": 569.911, + "acetone": 570.088, + "chcl3": 570.127, + "acetonitrile": 570.133, + "ch2cl2": 570.135, + "dmso": 570.198, + "h2o": 570.482, + "methanol": 570.103, + "thf": 570.164, + "toluene": 570.119, + }, + "dsd-blyp": { + "gas": 566.08, + "acetone": 564.411, + "chcl3": 564.793, + "acetonitrile": 564.406, + "ch2cl2": 564.583, + "dmso": 564.448, + "h2o": 564.684, + "methanol": 564.385, + "thf": 564.658, + "toluene": 565.213, + }, + "wb97x": { + "gas": 564.63, + "acetone": 564.706, + "chcl3": 564.726, + "acetonitrile": 564.75, + "ch2cl2": 564.72, + "dmso": 564.813, + "h2o": 565.093, + "methanol": 564.721, + "thf": 564.752, + "toluene": 564.742, + }, + "pbeh-3c": { + "gas": 619.182, + "acetone": 623.189, + "chcl3": 622.29, + "acetonitrile": 623.352, + "ch2cl2": 622.833, + "dmso": 623.451, + "h2o": 623.764, + "methanol": 623.308, + "thf": 622.734, + "toluene": 621.304, + }, + "kt2": { + "gas": 583.522, + "acetone": 584.278, + "chcl3": 584.168, + "acetonitrile": 584.337, + "ch2cl2": 584.241, + "dmso": 584.407, + "h2o": 584.701, + "methanol": 584.305, + "thf": 584.256, + "toluene": 584.034, + }, + }, + }, + "TMP": { + "pbeh-3c": { + "tpss": { + "gas": 291.33, + "acetone": 276.264, + "chcl3": 277.254, + "acetonitrile": 275.207, + "ch2cl2": 276.171, + "dmso": 276.988, + "h2o": 262.671, + "methanol": 263.366, + "thf": 278.685, + "toluene": 283.761, + }, + "pbe0": { + "gas": 277.761, + "acetone": 262.673, + "chcl3": 263.634, + "acetonitrile": 261.631, + "ch2cl2": 262.58, + "dmso": 263.406, + "h2o": 249.27, + "methanol": 249.931, + "thf": 265.061, + "toluene": 270.123, + }, + "dsd-blyp": { + "gas": 299.195, + "acetone": 286.35, + "chcl3": 287.213, + "acetonitrile": 285.469, + "ch2cl2": 286.302, + "dmso": 286.997, + "h2o": 274.843, + "methanol": 275.42, + "thf": 288.362, + "toluene": 292.724, + }, + "wb97x": { + "gas": 277.52, + "acetone": 262.317, + "chcl3": 263.295, + "acetonitrile": 261.26, + "ch2cl2": 262.227, + "dmso": 263.036, + "h2o": 248.805, + "methanol": 249.485, + "thf": 264.716, + "toluene": 269.816, + }, + "pbeh-3c": { + "gas": 390.602, + "acetone": 379.7, + "chcl3": 380.279, + "acetonitrile": 378.978, + "ch2cl2": 379.593, + "dmso": 380.317, + "h2o": 368.831, + "methanol": 369.216, + "thf": 381.391, + "toluene": 384.986, + }, + "kt2": { + "gas": 297.198, + "acetone": 281.884, + "chcl3": 282.896, + "acetonitrile": 280.816, + "ch2cl2": 281.794, + "dmso": 282.606, + "h2o": 268.382, + "methanol": 269.076, + "thf": 284.334, + "toluene": 289.473, + }, + }, + "b97-3c": { + "tpss": { + "gas": 286.404, + "acetone": 270.748, + "chcl3": 271.725, + "acetonitrile": 269.462, + "ch2cl2": 270.524, + "dmso": 271.355, + "h2o": 256.342, + "methanol": 257.122, + "thf": 273.469, + "toluene": 278.676, + }, + "pbe0": { + "gas": 272.706, + "acetone": 257.164, + "chcl3": 258.119, + "acetonitrile": 255.895, + "ch2cl2": 256.94, + "dmso": 257.797, + "h2o": 242.92, + "methanol": 243.667, + "thf": 259.855, + "toluene": 264.973, + }, + "dsd-blyp": { + "gas": 294.405, + "acetone": 281.158, + "chcl3": 282.018, + "acetonitrile": 280.073, + "ch2cl2": 280.993, + "dmso": 281.703, + "h2o": 269.086, + "methanol": 269.737, + "thf": 283.464, + "toluene": 287.882, + }, + "wb97x": { + "gas": 272.595, + "acetone": 256.861, + "chcl3": 257.836, + "acetonitrile": 255.578, + "ch2cl2": 256.643, + "dmso": 257.483, + "h2o": 242.627, + "methanol": 243.389, + "thf": 259.577, + "toluene": 264.773, + }, + "pbeh-3c": { + "gas": 385.991, + "acetone": 374.828, + "chcl3": 375.394, + "acetonitrile": 373.92, + "ch2cl2": 374.61, + "dmso": 375.349, + "h2o": 363.431, + "methanol": 363.874, + "thf": 376.762, + "toluene": 380.401, + }, + "kt2": { + "gas": 292.227, + "acetone": 276.414, + "chcl3": 277.413, + "acetonitrile": 275.12, + "ch2cl2": 276.191, + "dmso": 277.05, + "h2o": 262.135, + "methanol": 262.912, + "thf": 279.163, + "toluene": 284.4, + }, + }, + "tpss": { + "tpss": { + "gas": 286.331, + "acetone": 271.022, + "chcl3": 271.947, + "acetonitrile": 269.751, + "ch2cl2": 270.768, + "dmso": 271.616, + "h2o": 256.882, + "methanol": 257.6, + "thf": 273.659, + "toluene": 278.687, + }, + "pbe0": { + "gas": 272.619, + "acetone": 257.298, + "chcl3": 258.198, + "acetonitrile": 256.053, + "ch2cl2": 257.051, + "dmso": 257.926, + "h2o": 243.408, + "methanol": 244.095, + "thf": 259.935, + "toluene": 264.977, + }, + "dsd-blyp": { + "gas": 294.334, + "acetone": 281.319, + "chcl3": 282.131, + "acetonitrile": 280.265, + "ch2cl2": 281.144, + "dmso": 281.852, + "h2o": 269.472, + "methanol": 270.068, + "thf": 283.556, + "toluene": 287.875, + }, + "wb97x": { + "gas": 272.586, + "acetone": 257.148, + "chcl3": 258.069, + "acetonitrile": 255.901, + "ch2cl2": 256.919, + "dmso": 257.755, + "h2o": 243.195, + "methanol": 243.894, + "thf": 259.785, + "toluene": 264.863, + }, + "pbeh-3c": { + "gas": 385.897, + "acetone": 374.881, + "chcl3": 375.407, + "acetonitrile": 373.999, + "ch2cl2": 374.652, + "dmso": 375.391, + "h2o": 363.697, + "methanol": 364.097, + "thf": 376.757, + "toluene": 380.319, + }, + "kt2": { + "gas": 292.105, + "acetone": 276.574, + "chcl3": 277.519, + "acetonitrile": 275.313, + "ch2cl2": 276.339, + "dmso": 277.197, + "h2o": 262.553, + "methanol": 263.276, + "thf": 279.247, + "toluene": 284.37, + }, + }, + }, + } + si_tm_shieldings = { + "TMS": { + "pbeh-3c": { + "tpss": { + "gas": 334.2579542, + "acetone": 334.1639413, + "chcl3": 334.1459912, + "acetonitrile": 334.1644763, + "ch2cl2": 334.143167, + "dmso": 334.2355086, + "h2o": 334.1700712, + "methanol": 334.1638302, + "thf": 334.1765686, + "toluene": 334.1672644, + }, + "pbe0": { + "gas": 332.1432161, + "acetone": 332.0806043, + "chcl3": 332.027555, + "acetonitrile": 332.070525, + "ch2cl2": 332.0181509, + "dmso": 332.1389588, + "h2o": 332.0768365, + "methanol": 332.082777, + "thf": 332.0989747, + "toluene": 332.0655251, + }, + "pbeh-3c": { + "gas": 425.4500968, + "acetone": 425.4194168, + "chcl3": 425.3783658, + "acetonitrile": 425.4187809, + "ch2cl2": 425.3492293, + "dmso": 425.4302912, + "h2o": 425.4004059, + "methanol": 425.3865089, + "thf": 425.4157351, + "toluene": 425.4555181, + }, + }, + "b97-3c": { + "tpss": { + "gas": 334.5698984, + "acetone": 334.0803779, + "chcl3": 334.1093328, + "acetonitrile": 334.0665281, + "ch2cl2": 334.1280337, + "dmso": 334.1272572, + "h2o": 334.0495564, + "methanol": 334.1137413, + "thf": 334.1251606, + "toluene": 334.1235476, + }, + "pbe0": { + "gas": 332.3546979, + "acetone": 331.9058869, + "chcl3": 331.8955148, + "acetonitrile": 331.8800833, + "ch2cl2": 331.9140658, + "dmso": 331.948424, + "h2o": 331.8617288, + "methanol": 331.9375391, + "thf": 331.9562723, + "toluene": 331.9253075, + }, + "pbeh-3c": { + "gas": 426.0062656, + "acetone": 425.7811084, + "chcl3": 425.7602588, + "acetonitrile": 425.745999, + "ch2cl2": 425.7473718, + "dmso": 425.779427, + "h2o": 425.7365851, + "methanol": 425.7713265, + "thf": 425.7964293, + "toluene": 425.8200844, + }, + }, + "tpss": { + "tpss": { + "gas": 333.7779314, + "acetone": 333.3511708, + "chcl3": 333.3794838, + "acetonitrile": 333.3298692, + "ch2cl2": 333.3946486, + "dmso": 333.3881767, + "h2o": 333.3406562, + "methanol": 333.3784136, + "thf": 333.3860666, + "toluene": 333.3885135, + }, + "pbe0": { + "gas": 331.5820841, + "acetone": 331.1904714, + "chcl3": 331.1839521, + "acetonitrile": 331.1565218, + "ch2cl2": 331.1982524, + "dmso": 331.2347884, + "h2o": 331.1670301, + "methanol": 331.2231923, + "thf": 331.2383692, + "toluene": 331.2108329, + }, + "pbeh-3c": { + "gas": 425.0726297, + "acetone": 424.9009564, + "chcl3": 424.8706079, + "acetonitrile": 424.8831877, + "ch2cl2": 424.8554965, + "dmso": 424.9143792, + "h2o": 424.8579037, + "methanol": 424.8851226, + "thf": 424.9146175, + "toluene": 424.9330242, + }, + }, + } + } + si_orca_shieldings = { + "TMS": { + "pbeh-3c": { + "tpss": { + "gas": 344.281, + "acetone": 344.239, + "chcl3": 344.311, + "acetonitrile": 344.198, + "ch2cl2": 344.231, + "dmso": 344.292, + "h2o": 344.228, + "methanol": 344.291, + "thf": 344.283, + "toluene": 344.452, + }, + "pbe0": { + "gas": 332.181, + "acetone": 332.067, + "chcl3": 332.162, + "acetonitrile": 332.033, + "ch2cl2": 332.082, + "dmso": 332.122, + "h2o": 332.048, + "methanol": 332.122, + "thf": 332.134, + "toluene": 332.298, + }, + "dsd-blyp": { + "gas": 357.874, + "acetone": 357.762, + "chcl3": 357.864, + "acetonitrile": 357.726, + "ch2cl2": 357.783, + "dmso": 357.798, + "h2o": 357.715, + "methanol": 357.809, + "thf": 357.826, + "toluene": 358.001, + }, + "wb97x": { + "gas": 335.739, + "acetone": 335.641, + "chcl3": 335.74, + "acetonitrile": 335.606, + "ch2cl2": 335.659, + "dmso": 335.687, + "h2o": 335.608, + "methanol": 335.692, + "thf": 335.707, + "toluene": 335.879, + }, + "pbeh-3c": { + "gas": 425.385, + "acetone": 425.52, + "chcl3": 425.527, + "acetonitrile": 425.511, + "ch2cl2": 425.508, + "dmso": 425.578, + "h2o": 425.566, + "methanol": 425.557, + "thf": 425.54, + "toluene": 425.556, + }, + "kt2": { + "gas": 341.186, + "acetone": 341.197, + "chcl3": 341.284, + "acetonitrile": 341.166, + "ch2cl2": 341.208, + "dmso": 341.263, + "h2o": 341.201, + "methanol": 341.253, + "thf": 341.263, + "toluene": 341.446, + }, + }, + "b97-3c": { + "tpss": { + "gas": 344.503, + "acetone": 344.558, + "chcl3": 344.676, + "acetonitrile": 344.487, + "ch2cl2": 344.537, + "dmso": 344.67, + "h2o": 344.542, + "methanol": 344.662, + "thf": 344.637, + "toluene": 344.919, + }, + "pbe0": { + "gas": 332.338, + "acetone": 332.293, + "chcl3": 332.442, + "acetonitrile": 332.236, + "ch2cl2": 332.31, + "dmso": 332.4, + "h2o": 332.288, + "methanol": 332.392, + "thf": 332.403, + "toluene": 332.676, + }, + "dsd-blyp": { + "gas": 357.729, + "acetone": 357.628, + "chcl3": 357.774, + "acetonitrile": 357.578, + "ch2cl2": 357.655, + "dmso": 357.692, + "h2o": 357.632, + "methanol": 357.703, + "thf": 357.725, + "toluene": 357.985, + }, + "wb97x": { + "gas": 335.744, + "acetone": 335.688, + "chcl3": 335.837, + "acetonitrile": 335.633, + "ch2cl2": 335.71, + "dmso": 335.774, + "h2o": 335.704, + "methanol": 335.776, + "thf": 335.792, + "toluene": 336.064, + }, + "pbeh-3c": { + "gas": 425.911, + "acetone": 426.14, + "chcl3": 426.185, + "acetonitrile": 426.113, + "ch2cl2": 426.124, + "dmso": 426.254, + "h2o": 426.162, + "methanol": 426.22, + "thf": 426.196, + "toluene": 426.294, + }, + "kt2": { + "gas": 341.631, + "acetone": 341.666, + "chcl3": 341.811, + "acetonitrile": 341.61, + "ch2cl2": 341.676, + "dmso": 341.798, + "h2o": 341.602, + "methanol": 341.777, + "thf": 341.781, + "toluene": 342.086, + }, + }, + "tpss": { + "tpss": { + "gas": 343.24, + "acetone": 343.388, + "chcl3": 343.506, + "acetonitrile": 343.343, + "ch2cl2": 343.385, + "dmso": 343.48, + "h2o": 343.378, + "methanol": 343.47, + "thf": 343.449, + "toluene": 343.647, + }, + "pbe0": { + "gas": 331.055, + "acetone": 331.217, + "chcl3": 331.313, + "acetonitrile": 331.175, + "ch2cl2": 331.224, + "dmso": 331.303, + "h2o": 331.205, + "methanol": 331.296, + "thf": 331.293, + "toluene": 331.461, + }, + "dsd-blyp": { + "gas": 357.099, + "acetone": 357.125, + "chcl3": 357.231, + "acetonitrile": 357.081, + "ch2cl2": 357.141, + "dmso": 357.179, + "h2o": 357.075, + "methanol": 357.188, + "thf": 357.195, + "toluene": 357.379, + }, + "wb97x": { + "gas": 334.802, + "acetone": 334.886, + "chcl3": 334.987, + "acetonitrile": 334.842, + "ch2cl2": 334.897, + "dmso": 334.957, + "h2o": 334.855, + "methanol": 334.958, + "thf": 334.959, + "toluene": 335.134, + }, + "pbeh-3c": { + "gas": 424.346, + "acetone": 424.653, + "chcl3": 424.66, + "acetonitrile": 424.64, + "ch2cl2": 424.633, + "dmso": 424.74, + "h2o": 424.718, + "methanol": 424.709, + "thf": 424.681, + "toluene": 424.701, + }, + "kt2": { + "gas": 340.026, + "acetone": 340.228, + "chcl3": 340.311, + "acetonitrile": 340.189, + "ch2cl2": 340.226, + "dmso": 340.332, + "h2o": 340.207, + "methanol": 340.311, + "thf": 340.302, + "toluene": 340.453, + }, + }, + } + } + + if config.solvent != "gas": + # optimization in solvent: + if config.prog == "tm" and config.sm2 == "cosmo": + print( + "WARNING: The geometry optimization of the reference molecule " + "was calculated with DCOSMO-RS instead of COSMO as solvent " + "model (sm2)!" + ) + elif config.prog == "orca" and config.sm2 == "cpcm": + print( + "WARNING: The geometry optimization of the reference molecule " + "was calculated with SMD instead of CPCM as solvent model (sm2)!" + ) + if config.prog4_s == "tm": + h_qm_shieldings = h_tm_shieldings + c_qm_shieldings = c_tm_shieldings + f_qm_shieldings = f_tm_shieldings + p_qm_shieldings = p_tm_shieldings + si_qm_shieldings = si_tm_shieldings + lsm = "DCOSMO-RS" + lsm4 = "DCOSMO-RS" + lbasisS = "def2-TZVP" + if config.sm4_s == "cosmo": + print( + "WARNING: The reference shielding constant was calculated with DCOSMORS " + "instead of COSMO as solvent model (sm4_s)!" + ) + elif config.prog4_s == "orca": + lsm = "SMD" + lsm4 = "SMD" + lbasisS = "def2-TZVP" + h_qm_shieldings = h_orca_shieldings + c_qm_shieldings = c_orca_shieldings + f_qm_shieldings = f_orca_shieldings + p_qm_shieldings = p_orca_shieldings + si_qm_shieldings = si_orca_shieldings + if config.sm4_s == "cpcm": + print( + "WARNING: The reference shielding was calculated with SMD " + "instead of CPCM as solvent model (sm4_2)!" + ) + if config.func_s == "pbeh-3c": + lbasisS = "def2-mSVP" + + if config.basis_s != "def2-TZVP" and config.func_s != "pbeh-3c": + print( + "WARNING: The reference shielding constant was calculated with the " + "basis def2-TZVP (basisS)!" + ) + if config.func == "r2scan-3c": + print( + "WARNING: The reference shielding constants is not available for r2scan-3c and b97-3c is used instead!" + ) + opt_func = "b97-3c" + else: + opt_func = config.func + + # get absolute shielding constant of reference + prnterr = False + try: + hshielding = "{:4.3f}".format( + h_qm_shieldings[config.h_ref][opt_func][config.func_s][config.solvent] + ) + except KeyError: + hshielding = 0 + prnterr = True + try: + cshielding = "{:4.3f}".format( + c_qm_shieldings[config.c_ref][opt_func][config.func_s][config.solvent] + ) + except KeyError: + cshielding = 0 + prnterr = True + try: + fshielding = "{:4.3f}".format( + f_qm_shieldings[config.f_ref][opt_func][config.func_s][config.solvent] + ) + except KeyError: + fshielding = 0 + prnterr = True + try: + pshielding = "{:4.3f}".format( + p_qm_shieldings[config.p_ref][opt_func][config.func_s][config.solvent] + ) + except KeyError: + pshielding = 0 + prnterr = True + try: + sishielding = "{:4.3f}".format( + si_qm_shieldings[config.si_ref][opt_func][config.func_s][config.solvent] + ) + except KeyError: + sishielding = 0 + prnterr = True + if prnterr: + prnterr = ( + "ERROR! The reference absolute shielding constant " + "could not be found!\n You have to edit the file" + " .anmrrc by hand!" + ) + print(prnterr) + element_ref_shield = { + "h": float(hshielding), + "c": float(cshielding), + "f": float(fshielding), + "p": float(pshielding), + "si": float(sishielding), + } + + # for elementactive + exch = {True: 1, False: 0} + exchonoff = {True: "on", False: "off"} + # write .anmrrc + with open(os.path.join(config.cwd, ".anmrrc"), "w", newline=None) as arc: + arc.write("7 8 XH acid atoms\n") + if config.resonance_frequency is not None: + arc.write( + "ENSO qm= {} mf= {} lw= 1.0 J= {} S= {} T= {:6.2f} \n".format( + str(config.prog4_s).upper(), + str(config.resonance_frequency), + exchonoff[config.couplings], + exchonoff[config.shieldings], + float(config.temperature), + ) + ) + else: + arc.write("ENSO qm= {} lw= 1.2\n".format(str(config.prog4_s).upper())) + try: + length = max( + [ + len(i) + for i in [ + hshielding, + cshielding, + fshielding, + pshielding, + sishielding, + ] + ] + ) + except: + length = 6 + # lsm4 --> localsm4 ... + arc.write( + "{}[{}] {}[{}]/{}//{}[{}]/{}\n".format( + config.h_ref, + config.solvent, + config.func_s, + lsm4, + lbasisS, + opt_func, + lsm, + config.basis, + ) + ) + arc.write( + "1 {:{digits}} 0.0 {}\n".format( + hshielding, exch[config.h_active], digits=length + ) + ) # hydrogen + arc.write( + "6 {:{digits}} 0.0 {}\n".format( + cshielding, exch[config.c_active], digits=length + ) + ) # carbon + arc.write( + "9 {:{digits}} 0.0 {}\n".format( + fshielding, exch[config.f_active], digits=length + ) + ) # fluorine + arc.write( + "14 {:{digits}} 0.0 {}\n".format( + sishielding, exch[config.si_active], digits=length + ) + ) # silicon + arc.write( + "15 {:{digits}} 0.0 {}\n".format( + pshielding, exch[config.p_active], digits=length + ) + ) # phosphorus + return element_ref_shield diff --git a/docs/documentation.rst b/docs/documentation.rst new file mode 100644 index 0000000..93ac12f --- /dev/null +++ b/docs/documentation.rst @@ -0,0 +1,5 @@ +CENSO - Commandline ENergetic SOrting of Conformer Rotamer Ensembles +==================================================================== + + +assets folder (to store dcosmors potential files) diff --git a/docs/example.rst b/docs/example.rst new file mode 100644 index 0000000..e69de29 diff --git a/docs/src/solvents.png b/docs/src/solvents.png new file mode 100644 index 0000000..4fed872 Binary files /dev/null and b/docs/src/solvents.png differ diff --git a/docs/src/solvents.svg b/docs/src/solvents.svg new file mode 100644 index 0000000..96dfc3e --- /dev/null +++ b/docs/src/solvents.svg @@ -0,0 +1,580 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + { "acetone":{ "cosmors": ["propanone_c0", "propanone_c0"], "dcosmors": ["propanone", "propanone"], "xtb": ["acetone", "acetone"], "cpcm": ["acetone", "acetone"], "smd": ["ACETONE", "ACETONE"], "DC": 20.7 }, "benzaldehyde":{ "cosmors": ["benzaldehyde_c0", "benzaldehyde_c0"], "dcosmors": [null, "propanone"], "xtb": ["benzaldehyde", "benzaldehyde"], "cpcm": [null, "Pyridine"], "smd": ["BENZALDEHYDE", "BENZALDEHYDE"], "DC": 18.2 }, "benzene":{ "cosmors": ["benzene_c0", "benzene_c0"], "dcosmors": [null, "toluene"], "xtb": ["benzene", "benzene"], "cpcm": ["Benzene", "Benzene"], "smd": ["BENZENE", "BENZENE"], "DC": 2.3 }} + + solvent name in censo + dielectric constant (ɛ​) + + names of the parameter files + without file extension + + + + + + + + + + solvent names in each + solvent model + + + + + + + + not all solvents are availableand "replacement" solvents can be choosen + + diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..75282c5 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,28 @@ +[metadata] +name = censo-QM +version = 0.0.15 +description = CENSO - Comandline ENergetic SOrting for conformer rotamer ensembles +long_description = file: README.rst +long_description_content_type = text/x-rst +author = Fabian Bohle +author_email = 'tbd' +url = 'tbd' +license = LGPL3 +classifiers = + Intended Audience :: Science/Research + Operating System :: POSIX :: Linux + Programming Language :: Python :: 3 :: Only + Programming Language :: Python :: 3.6 + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Topic :: Scientific/Engineering :: Chemistry + +[options] +packages = find: +tests_require = + pytest +python_requires = >=3.6 + +[options.entry_points] +console_scripts = + censo = censo_qm.censo:main diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..fc1f76c --- /dev/null +++ b/setup.py @@ -0,0 +1,3 @@ +from setuptools import setup + +setup() \ No newline at end of file