Skip to content

Commit

Permalink
add init-reaction docs and args (#758)
Browse files Browse the repository at this point in the history
* add init-reaction docs and args

The commit includes:
- add docs
- add detailed arguments
- add strict argument checking
- strict check the init-reaction example

* init-reaction -> init_reaction

* fix links

* add the example to doc

* add Geom=PrintInputOrient to keywords
  • Loading branch information
njzjz authored Jul 5, 2022
1 parent 2e82464 commit 48f2abe
Show file tree
Hide file tree
Showing 8 changed files with 132 additions and 3 deletions.
2 changes: 2 additions & 0 deletions doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ DPGEN's documentation

init/init-bulk-mdata
init/init-surf-mdata
init/init-reaction
init/init-reaction-jdata
init/init-reaction-mdata

.. _autotest::
Expand Down
6 changes: 6 additions & 0 deletions doc/init/init-reaction-jdata.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
dpgen init_reaction parameters
======================================

.. dargs::
:module: dpgen.data.arginfo
:func: init_reaction_jdata_arginfo
21 changes: 21 additions & 0 deletions doc/init/init-reaction.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# init_reaction

`dpgen init_reaction` is a workflow to initilize data for reactive systems of small gas-phase molecules. The workflow was introduced in the "Initialization" section of [Energy & Fuels, 2021, 35 (1), 762–769](https://10.1021/acs.energyfuels.0c03211).

To start the workflow, one needs a box containing reactive systems. The following packages are required for each of the step:
- Exploring: [LAMMPS](https://github.com/lammps/lammps)
- Sampling: [MDDatasetBuilder](https://github.com/tongzhugroup/mddatasetbuilder)
- Labeling: [Gaussian](https://gaussian.com/)

The Exploring step uses LAMMPS [pair_style reaxff](https://docs.lammps.org/latest/pair_reaxff.html) to run a short ReaxMD NVT MD simulation. In the Sampling step, molecular clusters are taken and k-means clustering algorithm is applied to remove the redundancy, which is described in [Nature Communications, 11, 5713 (2020)](https://doi.org/10.1038/s41467-020-19497-z). The Labeling step calculates energies and forces using the Gaussian package.

An example of `reaction.json` is given below:

```{literalinclude} ../../examples/init/reaction.json
:language: json
:linenos:
```

For detailed parameters, see [parametes](init-reaction-jdata.rst) and [machine parameters](init-reaction-mdata.rst).

The genereated data can be used to continue DP-GEN concurrent learning workflow. Read [Energy & Fuels, 2021, 35 (1), 762–769](https://10.1021/acs.energyfuels.0c03211) for details.
43 changes: 42 additions & 1 deletion dpgen/data/arginfo.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from dargs import Argument
from dargs import Argument, ArgumentEncoder

from dpgen.arginfo import general_mdata_arginfo

Expand Down Expand Up @@ -34,3 +34,44 @@ def init_reaction_mdata_arginfo() -> Argument:
arginfo
"""
return general_mdata_arginfo("init_reaction_mdata", ("reaxff", "build", "fp"))


def init_reaction_jdata_arginfo() -> Argument:
"""Generate arginfo for dpgen init_reaction jdata.
Returns
-------
Argument
dpgen init_reaction jdata arginfo
"""
doc_init_reaction = "Generate initial data for reactive systems for small gas-phase molecules, from a ReaxFF NVT MD trajectory."
doc_type_map = "Type map, which should match types in the initial data. e.g. [\"C\", \"H\", \"O\"]"
doc_reaxff = "Parameters for ReaxFF NVT MD."
doc_data = "Path to initial LAMMPS data file. The atom_style should be charge."
doc_ff = "Path to ReaxFF force field file. Available in the lammps/potentials directory."
doc_control = "Path to ReaxFF control file."
doc_temp = "Target Temperature for the NVT MD simulation. Unit: K."
doc_dt = "Real time for every time step. Unit: fs."
doc_tau_t = "Time to determine how rapidly the temperature. Unit: fs."
doc_dump_frep = "Frequency of time steps to collect trajectory."
doc_nstep = "Total steps to run the ReaxFF MD simulation."
doc_cutoff = "Cutoff radius to take clusters from the trajectory. Note that only a complete molecule or free radical will be taken."
doc_dataset_size = "Collected dataset size for each bond type."
doc_qmkeywords = "Gaussian keywords for first-principle calculations. e.g. force mn15/6-31g** Geom=PrintInputOrient. Note that \"force\" job is necessary to collect data. Geom=PrintInputOrient should be used when there are more than 50 atoms in a cluster."

return Argument("init_reaction_jdata", dict, [
Argument("type_map", list, doc=doc_type_map),
Argument("reaxff", dict, [
Argument("data", str, doc=doc_data),
Argument("ff", str, doc=doc_ff),
Argument("control", str, doc=doc_control),
Argument("temp", [float, int], doc=doc_temp),
Argument("dt", [float, int], doc=doc_dt),
Argument("tau_t", [float, int], doc=doc_tau_t),
Argument("dump_freq", int, doc=doc_dump_frep),
Argument("nstep", int, doc=doc_nstep),
], doc=doc_reaxff),
Argument("cutoff", float, doc=doc_cutoff),
Argument("dataset_size", int, doc=doc_dataset_size),
Argument("qmkeywords", str, doc=doc_qmkeywords),
], doc=doc_init_reaction)
6 changes: 5 additions & 1 deletion dpgen/data/reaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
from dpgen.dispatcher.Dispatcher import make_submission_compat
from dpgen.remote.decide_machine import convert_mdata
from dpgen.generator.run import create_path, make_fp_task_name
from dpgen.util import sepline
from dpgen.util import sepline, normalize
from .arginfo import init_reaction_jdata_arginfo

reaxff_path = "00.reaxff"
build_path = "01.build"
Expand Down Expand Up @@ -207,6 +208,9 @@ def gen_init_reaction(args):
with open(args.MACHINE, "r") as fp:
mdata = json.load(fp)

jdata_arginfo = init_reaction_jdata_arginfo()
jdata = normalize(jdata_arginfo, jdata)

mdata = convert_mdata(mdata, ["reaxff", "build", "fp"])
record = "record.reaction"
iter_rec = -1
Expand Down
23 changes: 23 additions & 0 deletions dpgen/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from typing import Union, List
from pathlib import Path

from dargs import Argument

from dpgen import dlog

"""
Expand Down Expand Up @@ -47,3 +49,24 @@ def expand_sys_str(root_dir: Union[str, Path]) -> List[str]:
if (root_dir / "type.raw").is_file():
matches.append(str(root_dir))
return matches

def normalize(arginfo: Argument, data: dict, strict_check: bool = True) -> dict:
"""Normalize and check input data.
Parameters
----------
arginfo : dargs.Argument
argument information
data : dict
input data
strict_check : bool, default=True
strict check data or not
Returns
-------
dict
normalized data
"""
data = arginfo.normalize_value(data, trim_pattern="_*")
arginfo.check_value(data, strict=strict_check)
return data
2 changes: 1 addition & 1 deletion examples/init/reaction.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@
},
"cutoff": 3.5,
"dataset_size": 100,
"qmkeywords": "b3lyp/6-31g** force"
"qmkeywords": "b3lyp/6-31g** force Geom=PrintInputOrient"
}
32 changes: 32 additions & 0 deletions tests/test_check_examples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""This module ensures input in the examples directory
could pass the argument checking.
"""
import unittest
import json
from pathlib import Path

from dpgen.util import normalize
from dpgen.data.arginfo import (
init_reaction_jdata_arginfo,
)

init_reaction_jdata = init_reaction_jdata_arginfo()

# directory of examples
p_examples = Path(__file__).parent.parent / "examples"

# input_files : tuple[tuple[Argument, Path]]
# tuple of example list
input_files = (
(init_reaction_jdata, p_examples / "init" / "reaction.json"),
)


class TestExamples(unittest.TestCase):
def test_arguments(self):
for arginfo, fn in input_files:
fn = str(fn)
with self.subTest(fn=fn):
with open(fn) as f:
data = json.load(f)
normalize(arginfo, data)

0 comments on commit 48f2abe

Please sign in to comment.