From 60238992104933493737855f9a64fb23b854b19d Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 9 Jun 2022 00:51:49 -0400
Subject: [PATCH 01/26] refactor simplify (#730)

* refactor simplify

1. Used `dp model-devi` to calculate model deviation, instead of local calculation. Supported by deepmodeling/deepmd-kit#1618, released in v2.1.1. So the version earlier than 2.1.1 is not supported any more.
2. Assumed all systems are MultiSystems.
3. Removed energy model deviation support

* expand path when getting multisystems

* let `make_train` and `run_train` expand paths

* load numpy array instead

* use dpdata to get nframes

* fix tests

* update README
---
 README.md                  |   6 +-
 dpgen/generator/run.py     |  70 +++----
 dpgen/simplify/simplify.py | 381 +++++++++----------------------------
 dpgen/util.py              |  22 +++
 4 files changed, 140 insertions(+), 339 deletions(-)
diff --git a/README.md b/README.md
index b59725ec9..c833ed059 100644
--- a/README.md
+++ b/README.md
@@ -499,9 +499,8 @@ The bold notation of key (such aas **type_map**) means that it's a necessary key
 | **use_ele_temp** | int | 0 | Currently only support fp_style vasp. 0(default): no electron temperature. 1: eletron temperature as frame parameter. 2: electron temperature as atom parameter.
 | *#Data*
  | init_data_prefix | String | "/sharedext4/.../data/" | Prefix of initial data directories
- | ***init_data_sys*** | List of string|["CH4.POSCAR.01x01x01/.../deepmd"] |Directories of initial data. You may use either absolute or relative path here.
+ | ***init_data_sys*** | List of string|["CH4.POSCAR.01x01x01/.../deepmd"] |Directories of initial data. You may use either absolute or relative path here. Systems will be detected recursively in the directories.
  | ***sys_format*** | String | "vasp/poscar" | Format of initial data. It will be `vasp/poscar` if not set.
- | init_multi_systems | Boolean | false | If set to `true`, `init_data_sys` directories should contain sub-directories of various systems. DP-GEN will regard all of these sub-directories as inital data systems.
  | init_batch_size   | String of integer     | [8]                                                            | Each number is the batch_size of corresponding system  for training in `init_data_sys`. One recommended rule for setting the `sys_batch_size` and `init_batch_size` is that `batch_size` mutiply number of atoms ot the stucture should be larger than 32. If set to `auto`, batch size will be 32 divided by number of atoms. |
   | sys_configs_prefix | String | "/sharedext4/.../data/" | Prefix of `sys_configs`
  | **sys_configs**   | List of list of string         | [<br />["/sharedext4/.../POSCAR"], <br />["....../POSCAR"]<br />] | Containing directories of structures to be explored in iterations.Wildcard characters are supported here. |
@@ -1086,7 +1085,6 @@ Here is an example of `param.json` for QM7 dataset:
         },
         "_comment": "that's all"
     },
-    "use_clusters": true,
     "fp_style": "gaussian",
     "shuffle_poscar": false,
     "fp_task_max": 1000,
@@ -1109,7 +1107,7 @@ Here is an example of `param.json` for QM7 dataset:
 }
 ```
 
-Here `pick_data` is the data to simplify and currently only supports `MultiSystems` containing `System` with `deepmd/npy` format, and `use_clusters` should always be `true`. `init_pick_number` and `iter_pick_number` are the numbers of picked frames. `e_trust_lo`, `e_trust_hi` mean the range of the deviation of the frame energy, and `f_trust_lo` and `f_trust_hi` mean the range of the max deviation of atomic forces in a frame. `fp_style` can only be `gaussian` currently. Other parameters are as the same as those of generator.
+Here `pick_data` is the directory to data to simplify where the program recursively detects systems `System` with `deepmd/npy` format. `init_pick_number` and `iter_pick_number` are the numbers of picked frames. `e_trust_lo`, `e_trust_hi` mean the range of the deviation of the frame energy, and `f_trust_lo` and `f_trust_hi` mean the range of the max deviation of atomic forces in a frame. `fp_style` can only be `gaussian` currently. Other parameters are as the same as those of generator.
 
 
 ## Set up machine
diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py
index d06c137b3..1bd196cc6 100644
--- a/dpgen/generator/run.py
+++ b/dpgen/generator/run.py
@@ -61,7 +61,7 @@
 from dpgen.generator.lib.ele_temp import NBandsEsti
 from dpgen.remote.decide_machine import convert_mdata
 from dpgen.dispatcher.Dispatcher import Dispatcher, _split_tasks, make_dispatcher, make_submission
-from dpgen.util import sepline
+from dpgen.util import sepline, expand_sys_str
 from dpgen import ROOT_PATH
 from pymatgen.io.vasp import Incar,Kpoints,Potcar
 from dpgen.auto_test.lib.vasp import make_kspacing_kpoints
@@ -288,13 +288,10 @@ def make_train (iter_index,
     # make sure all init_data_sys has the batch size -- for the following `zip`
     assert (len(init_data_sys_) <= len(init_batch_size_))
     for ii, ss in zip(init_data_sys_, init_batch_size_) :
-        if jdata.get('init_multi_systems', False):
-            for single_sys in os.listdir(os.path.join(work_path, 'data.init', ii)):
-                init_data_sys.append(os.path.join('..', 'data.init', ii, single_sys))
-                init_batch_size.append(detect_batch_size(ss, os.path.join(work_path, 'data.init', ii, single_sys)))
-        else:
-            init_data_sys.append(os.path.join('..', 'data.init', ii))
-            init_batch_size.append(detect_batch_size(ss, os.path.join(work_path, 'data.init', ii)))
+        sys_paths = expand_sys_str(os.path.join(init_data_prefix, ii))
+        for single_sys in sys_paths:
+            init_data_sys.append(os.path.normpath(os.path.join('..', 'data.init', ii, os.path.relpath(single_sys, os.path.join(init_data_prefix, ii)))))
+            init_batch_size.append(detect_batch_size(ss, single_sys))
     old_range = None
     if iter_index > 0 :
         for ii in range(iter_index) :
@@ -308,25 +305,16 @@ def make_train (iter_index,
                 sys_batch_size = ["auto" for aa in range(len(sys_list))]
             for jj in fp_data_sys :
                 sys_idx = int(jj.split('.')[-1])
-                if jdata.get('use_clusters', False):
-                    nframes = 0
-                    for sys_single in os.listdir(jj):
-                        tmp_box = np.loadtxt(os.path.join(jj, sys_single, 'box.raw'))
-                        tmp_box = np.reshape(tmp_box, [-1,9])
-                        nframes += tmp_box.shape[0]
-                    if nframes < fp_task_min :
-                        log_task('nframes (%d) in data sys %s is too small, skip' % (nframes, jj))
-                        continue
-                    for sys_single in os.listdir(jj):
-                        init_data_sys.append(os.path.join('..', 'data.iters', jj, sys_single))
-                        init_batch_size.append(detect_batch_size(sys_batch_size[sys_idx], os.path.join(jj, sys_single)))
-                else:
-                    nframes = dpdata.System(jj, 'deepmd/npy').get_nframes()
-                    if nframes < fp_task_min :
-                        log_task('nframes (%d) in data sys %s is too small, skip' % (nframes, jj))
-                        continue
-                    init_data_sys.append(os.path.join('..', 'data.iters', jj))
-                    init_batch_size.append(detect_batch_size(sys_batch_size[sys_idx], jj))
+                sys_paths = expand_sys_str(jj)
+                nframes = 0
+                for sys_single in sys_paths:
+                    nframes += dpdata.LabeledSystem(sys_single, fmt="deepmd/npy").get_nframes()
+                if nframes < fp_task_min :
+                    log_task('nframes (%d) in data sys %s is too small, skip' % (nframes, jj))
+                    continue
+                for sys_single in sys_paths:
+                    init_data_sys.append(os.path.normpath(os.path.join('..', 'data.iters', sys_single)))
+                    init_batch_size.append(detect_batch_size(sys_batch_size[sys_idx], sys_single))
     # establish tasks
     jinput = jdata['default_training_param']
     try:
@@ -568,25 +556,17 @@ def run_train (iter_index,
     os.chdir(work_path)
     fp_data = glob.glob(os.path.join('data.iters', 'iter.*', '02.fp', 'data.*'))
     for ii in init_data_sys :
-        if jdata.get('init_multi_systems', False):
-            for single_sys in os.listdir(os.path.join(ii)):
-                trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'set.*'))
-                trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'type*.raw'))
-                trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'nopbc'))
-        else:
-            trans_comm_data += glob.glob(os.path.join(ii, 'set.*'))
-            trans_comm_data += glob.glob(os.path.join(ii, 'type*.raw'))
-            trans_comm_data += glob.glob(os.path.join(ii, 'nopbc'))
+        sys_paths = expand_sys_str(ii)
+        for single_sys in sys_paths:
+            trans_comm_data += glob.glob(os.path.join(single_sys, 'set.*'))
+            trans_comm_data += glob.glob(os.path.join(single_sys, 'type*.raw'))
+            trans_comm_data += glob.glob(os.path.join(single_sys, 'nopbc'))
     for ii in fp_data :
-        if jdata.get('use_clusters', False):
-            for single_sys in os.listdir(os.path.join(ii)):
-                trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'set.*'))
-                trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'type*.raw'))
-                trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'nopbc'))
-        else:
-            trans_comm_data += glob.glob(os.path.join(ii, 'set.*'))
-            trans_comm_data += glob.glob(os.path.join(ii, 'type*.raw'))
-            trans_comm_data += glob.glob(os.path.join(ii, 'nopbc'))
+        sys_paths = expand_sys_str(ii)
+        for single_sys in sys_paths:
+            trans_comm_data += glob.glob(os.path.join(single_sys, 'set.*'))
+            trans_comm_data += glob.glob(os.path.join(single_sys, 'type*.raw'))
+            trans_comm_data += glob.glob(os.path.join(single_sys, 'nopbc'))
     os.chdir(cwd)
 
     try:
diff --git a/dpgen/simplify/simplify.py b/dpgen/simplify/simplify.py
index 982db3114..529401519 100644
--- a/dpgen/simplify/simplify.py
+++ b/dpgen/simplify/simplify.py
@@ -9,6 +9,7 @@
 02: fp (optional, if the original dataset do not have fp data, same as generator)
 """
 import logging
+import warnings
 import queue
 import os
 import json
@@ -21,7 +22,7 @@
 
 from dpgen import dlog
 from dpgen import SHORT_CMD
-from dpgen.util import sepline
+from dpgen.util import sepline, expand_sys_str
 from distutils.version import LooseVersion
 from dpgen.dispatcher.Dispatcher import Dispatcher, _split_tasks, make_dispatcher, make_submission
 from dpgen.generator.run import make_train, run_train, post_train, run_fp, post_fp, fp_name, model_devi_name, train_name, train_task_fmt, sys_link_fp_vasp_pp, make_fp_vasp_incar, make_fp_vasp_kp, make_fp_vasp_cp_cvasp, data_system_fmt, model_devi_task_fmt, fp_task_fmt
@@ -38,17 +39,6 @@
 sys_name_fmt = 'sys.' + data_system_fmt
 sys_name_pattern = 'sys.[0-9]*[0-9]'
 
-def expand_sys_str(root_dir):
-    matches = []
-    for root, dirnames, filenames in os.walk(root_dir, followlinks=True):
-        for filename in fnmatch.filter(filenames, 'type.raw'):
-            matches.append(root)
-    matches.sort()
-    dirnames = [os.path.basename(ii) for ii in matches]
-    if (len(list(set(dirnames))) != len(matches)) :
-        raise RuntimeError('duplicated system name: it is highly recommend to place all systems in the same level of directory and has different names')
-    return matches
-
 
 def get_system_cls(jdata):
     if jdata.get("labeled", False):
@@ -58,28 +48,12 @@ def get_system_cls(jdata):
 
 def get_multi_system(path, jdata):
     system = get_system_cls(jdata)
+    system_paths = expand_sys_str(path)
     systems = dpdata.MultiSystems(
-        *[system(os.path.join(path, s), fmt='deepmd/npy') for s in os.listdir(path)])
-    return systems
-
-
-def get_systems(path, jdata):
-    system_cls = get_system_cls(jdata)
-    system_paths = expand_sys_str(path)    
-    systems = {}
-    for ii in system_paths:
-        systems[os.path.basename(ii)] = system_cls(ii, fmt='deepmd/npy')
+        *[system(s, fmt='deepmd/npy') for s in system_paths])
     return systems
 
 
-def get_system_idx(path):
-    system_paths = expand_sys_str(path)    
-    sys_idx_map = {}
-    for idx,ii in enumerate(system_paths):
-        sys_idx_map[os.path.basename(ii)] = idx
-    return sys_idx_map
-
-
 def init_model(iter_index, jdata, mdata):
     training_init_model = jdata.get('training_init_model', False)
     if not training_init_model:
@@ -111,20 +85,13 @@ def init_pick(iter_index, jdata, mdata):
     """pick up init data from dataset randomly"""
     pick_data = jdata['pick_data']
     init_pick_number = jdata['init_pick_number']
-    use_clusters = jdata.get('use_clusters', False)
     # use MultiSystems with System
     # TODO: support System and LabeledSystem
     # TODO: support other format
-    if use_clusters:
-        systems = get_multi_system(pick_data, jdata)
-    else:
-        systems = get_systems(pick_data, jdata)
+    systems = get_multi_system(pick_data, jdata)
     # label the system
     labels = []
-    if use_clusters:
-        items = systems.systems.items()
-    else:
-        items = systems.items()
+    items = systems.systems.items()
     for key, system in items:
         labels.extend([(key, j) for j in range(len(system))])
 
@@ -146,48 +113,18 @@ def init_pick(iter_index, jdata, mdata):
     _init_dump_selected_frames(systems, labels, rest_idx, sys_data_path, jdata)
 
 
-def _add_system(systems, key, system):
-    if key in systems.keys():
-        systems[key].append(system)
-    else:
-        systems[key] = system
-    return systems
-
-
 def _init_dump_selected_frames(systems, labels, selc_idx, sys_data_path, jdata):
-    pick_data = jdata['pick_data']
-    use_clusters = jdata.get('use_clusters', False)
-    if use_clusters:
-        selc_systems = dpdata.MultiSystems()
-        for j in selc_idx:
-            sys_name, sys_id = labels[j]
-            selc_systems.append(systems[sys_name][sys_id])
-        selc_systems.to_deepmd_raw(sys_data_path)
-        selc_systems.to_deepmd_npy(sys_data_path, set_size=selc_idx.size)
-    else:
-        selc_systems = {}
-        for j in selc_idx:
-            sys_name, sys_id = labels[j]
-            selc_systems = _add_system(selc_systems, sys_name, systems[sys_name][sys_id])
-        sys_idx_map = get_system_idx(pick_data)
-        for kk in selc_systems.keys():
-            sub_path = os.path.join(sys_data_path, sys_name_fmt % sys_idx_map[kk])
-            selc_systems[kk].to_deepmd_raw(sub_path)
-            selc_systems[kk].to_deepmd_npy(sub_path, set_size=selc_idx.size)
-        with open(os.path.join(sys_data_path, 'sys_idx_map.json'), 'w') as fp:
-            json.dump(sys_idx_map, fp, indent=4)
-
-def _dump_system_dict(systems, path):
-    for kk in systems:
-        sub_path = os.path.join(path, sys_name_fmt % (int(kk)))
-        systems[kk].to_deepmd_raw(sub_path)
-        systems[kk].to_deepmd_npy(sub_path, set_size=systems[kk].get_nframes())
+    selc_systems = dpdata.MultiSystems()
+    for j in selc_idx:
+        sys_name, sys_id = labels[j]
+        selc_systems.append(systems[sys_name][sys_id])
+    selc_systems.to_deepmd_raw(sys_data_path)
+    selc_systems.to_deepmd_npy(sys_data_path, set_size=selc_idx.size)
 
 
 def make_model_devi(iter_index, jdata, mdata):
     """calculate the model deviation of the rest idx"""
     pick_data = jdata['pick_data']
-    use_clusters = jdata.get('use_clusters', False)
     iter_name = make_iter_name(iter_index)
     work_path = os.path.join(iter_name, model_devi_name)
     create_path(work_path)
@@ -203,25 +140,7 @@ def make_model_devi(iter_index, jdata, mdata):
     rest_data_path = os.path.join(last_iter_name, model_devi_name, rest_data_name)
     if not os.path.exists(rest_data_path):
         return False
-    if use_clusters:
-        for jj, subsystem in enumerate(os.listdir(rest_data_path)):
-            task_name = "task." + model_devi_task_fmt % (0, jj)
-            task_path = os.path.join(work_path, task_name)
-            create_path(task_path)
-            os.symlink(os.path.abspath(os.path.join(rest_data_path, subsystem)),
-                       os.path.abspath(os.path.join(task_path, rest_data_name)))
-    else:
-        rest_data_path = os.path.abspath(rest_data_path)
-        sys_path = glob.glob(os.path.join(rest_data_path, sys_name_pattern))
-        cwd = os.getcwd()
-        for ii in sys_path:
-            task_name = "task." + model_devi_task_fmt % (int(os.path.basename(ii).split('.')[1]), 0)
-            task_path = os.path.join(work_path, task_name)
-            create_path(task_path)            
-            os.chdir(task_path)
-            os.symlink(os.path.relpath(ii), rest_data_name)
-            os.chdir(cwd)
-        os.chdir(cwd)
+    os.symlink(os.path.abspath(rest_data_path), os.path.join(work_path, rest_data_name + ".old"))
     return True
 
 
@@ -231,43 +150,28 @@ def run_model_devi(iter_index, jdata, mdata):
     work_path = os.path.join(iter_name, model_devi_name)
     # generate command
     commands = []
-    tasks = glob.glob(os.path.join(work_path, "task.*"))
-    run_tasks = [os.path.basename(ii) for ii in tasks]
+    run_tasks = ["."]
     # get models
     models = glob.glob(os.path.join(work_path, "graph*pb"))
     model_names = [os.path.basename(ii) for ii in models]
     task_model_list = []
     for ii in model_names:
-        task_model_list.append(os.path.join('..', ii))
-    # get max data size
-    data_size = max([len(dpdata.System(os.path.join(
-        task, rest_data_name), fmt="deepmd/npy")) for task in tasks])
+        task_model_list.append(os.path.join('.', ii))
     # models
     commands = []
-    detail_file_names = []
-    for ii, mm in enumerate(task_model_list):
-        detail_file_name = "{prefix}-{ii}".format(
-            prefix=detail_file_name_prefix,
-            ii=ii,
-        )
-        # TODO: support 0.x?
-        command = "{python} -m deepmd test -m {model} -s {system} -n {numb_test} -d {detail_file}".format(
-            python=mdata['python_test_path'],
-            model=mm,
-            system=rest_data_name,
-            numb_test=data_size,
-            detail_file=detail_file_name,
-        )
-        commands.append(command)
-        detail_file_names.append(detail_file_name)
+    detail_file_name = detail_file_name_prefix
+    command = "{dp} model-devi -m {model} -s {system} -o {detail_file}".format(
+        dp=mdata.get('model_devi_command', 'dp'),
+        model=" ".join(task_model_list),
+        system=rest_data_name + ".old",
+        detail_file=detail_file_name,
+    )
+    commands = [command]
     # submit
-    try:
-        model_devi_group_size = mdata['model_devi_group_size']
-    except Exception:
-        model_devi_group_size = 1
+    model_devi_group_size = mdata.get('model_devi_group_size', 1)
 
-    forward_files = [rest_data_name]
-    backward_files = sum([[pf+".e.out", pf+".f.out", pf+".v.out"] for pf in detail_file_names], [])
+    forward_files = [rest_data_name + ".old"]
+    backward_files = [detail_file_name]
 
     api_version = mdata.get('api_version', '0.9')
     if LooseVersion(api_version) < LooseVersion('1.0'):
@@ -303,102 +207,50 @@ def run_model_devi(iter_index, jdata, mdata):
 
 def post_model_devi(iter_index, jdata, mdata):
     """calculate the model deviation"""
-    use_clusters = jdata.get('use_clusters', False)
     iter_name = make_iter_name(iter_index)
     work_path = os.path.join(iter_name, model_devi_name)
-    tasks = glob.glob(os.path.join(work_path, "task.*"))
-    tasks.sort()
-
-    e_trust_lo = jdata['e_trust_lo']
-    e_trust_hi = jdata['e_trust_hi']
-    f_trust_lo = jdata['f_trust_lo']
-    f_trust_hi = jdata['f_trust_hi']
-
-    if use_clusters:
-        sys_accurate = dpdata.MultiSystems()
-        sys_candinate = dpdata.MultiSystems()
-        sys_failed = dpdata.MultiSystems()
-    else:
-        sys_accurate = {}
-        sys_candinate = {}
-        sys_failed = {}
-        all_names = set()
-
-    for task in tasks:
-        if not use_clusters:
-            sys_name = os.path.basename(task).split('.')[1]
-            all_names.add(sys_name)
-        # e.out
-        details_e = glob.glob(os.path.join(task, "{}-*.e.out".format(detail_file_name_prefix)))
-        e_all = np.array([np.loadtxt(detail_e, ndmin=2)[:, 1] for detail_e in details_e])
-        e_std = np.std(e_all, axis=0)
-        n_frame = e_std.size
-        
-        # f.out
-        details_f = glob.glob(os.path.join(task, "{}-*.f.out".format(detail_file_name_prefix)))
-        f_all = np.array([np.loadtxt(detail_f, ndmin=2)[:, 3:6].reshape((n_frame, -1, 3)) for detail_f in details_f])
-        # (n_model, n_frame, n_atom, 3)
-        f_std = np.std(f_all, axis=0)
-        # (n_frame, n_atom, 3)
-        f_std = np.linalg.norm(f_std, axis=2)
-        # (n_frame, n_atom)
-        f_std = np.max(f_std, axis=1)
-        # (n_frame,)
-
-        system_cls = get_system_cls(jdata)
-        for subsys, e_devi, f_devi in zip(system_cls(os.path.join(task, rest_data_name), fmt='deepmd/npy'), e_std, f_std):
-            if (e_devi < e_trust_hi and e_devi >= e_trust_lo) or (f_devi < f_trust_hi and f_devi >= f_trust_lo) :
-                if use_clusters:
+
+    f_trust_lo = jdata['model_devi_f_trust_lo']
+    f_trust_hi = jdata['model_devi_f_trust_hi']
+
+    sys_accurate = dpdata.MultiSystems()
+    sys_candinate = dpdata.MultiSystems()
+    sys_failed = dpdata.MultiSystems()
+
+    sys_entire = dpdata.MultiSystems().from_deepmd_npy(os.path.join(work_path, rest_data_name + ".old"))
+
+    detail_file_name = detail_file_name_prefix
+    with open(os.path.join(work_path, detail_file_name)) as f:
+        for line in f:
+            if line.startswith("# data.rest.old"):
+                name = (line.split()[1]).split("/")[-1]
+            elif line.startswith("#"):
+                pass
+            else:
+                idx = int(line.split()[0])
+                f_devi = float(line.split()[4])
+                subsys = sys_entire[name][idx]
+                if f_trust_lo <= f_devi < f_trust_hi:
                     sys_candinate.append(subsys)
-                else:
-                    sys_candinate = _add_system(sys_candinate, sys_name, subsys)
-            elif (e_devi >= e_trust_hi ) or (f_devi >= f_trust_hi ):
-                if use_clusters:
+                elif f_devi >= f_trust_hi:
                     sys_failed.append(subsys)
-                else:
-                    sys_failed = _add_system(sys_failed, sys_name, subsys)
-            elif (e_devi < e_trust_lo and f_devi < f_trust_lo ):
-                if use_clusters:
+                elif f_devi < f_trust_lo:
                     sys_accurate.append(subsys)
                 else:
-                    sys_accurate = _add_system(sys_accurate, sys_name, subsys)
-            else:
-                raise RuntimeError('reach a place that should NOT be reached...')
-    if use_clusters:
-        counter = {"candidate": sys_candinate.get_nframes(), "accurate": sys_accurate.get_nframes(), "failed": sys_failed.get_nframes()}
-        fp_sum = sum(counter.values())
-        for cc_key, cc_value in counter.items():
-            dlog.info("{0:9s} : {1:6d} in {2:6d} {3:6.2f} %".format(cc_key, cc_value, fp_sum, cc_value/fp_sum*100))
-    else:
-        all_names = list(all_names)
-        all_names.sort()
-        counter = {"candidate": 0, "accurate": 0, "failed": 0}
-        for kk in all_names:
-            sys_counter = {"candidate": 0, "accurate": 0, "failed": 0}
-            if kk in sys_candinate.keys():
-                sys_counter['candidate'] += sys_candinate[kk].get_nframes()
-            if kk in sys_accurate.keys():
-                sys_counter['accurate'] += sys_accurate[kk].get_nframes()
-            if kk in sys_failed.keys():
-                sys_counter['failed'] += sys_failed[kk].get_nframes()
-            fp_sum = sum(sys_counter.values())
-            for cc_key, cc_value in sys_counter.items():
-                if fp_sum != 0:
-                    dlog.info("sys{0:s} {1:9s} : {2:6d} in {3:6d} {4:6.2f} %".format(kk, cc_key, cc_value, fp_sum, cc_value/fp_sum*100))
-                else:
-                    dlog.info("sys{0:s} {1:9s} : {2:6d} in {3:6d} {4:6.2f} %".format(kk, cc_key, cc_value, fp_sum, 0*100))
-            for ii in ['candidate', 'accurate', 'failed']:
-                counter[ii] += sys_counter[ii]
+                    raise RuntimeError('reach a place that should NOT be reached...')
+
+    counter = {"candidate": sys_candinate.get_nframes(), "accurate": sys_accurate.get_nframes(), "failed": sys_failed.get_nframes()}
+    fp_sum = sum(counter.values())
+    for cc_key, cc_value in counter.items():
+        dlog.info("{0:9s} : {1:6d} in {2:6d} {3:6.2f} %".format(cc_key, cc_value, fp_sum, cc_value/fp_sum*100))
     
     if counter['candidate'] == 0 and counter['failed'] > 0:
         raise RuntimeError('no candidate but still have failed cases, stop. You may want to refine the training or to increase the trust level hi')
 
     # label the candidate system
     labels = []
-    if use_clusters:
-        items = sys_candinate.systems.items()
-    else:
-        items = sys_candinate.items()
+    items = sys_candinate.systems.items()
+
     for key, system in items:
         labels.extend([(key, j) for j in range(len(system))])
     # candinate: pick up randomly
@@ -412,112 +264,61 @@ def post_model_devi(iter_index, jdata, mdata):
               (counter['candidate'], len(pick_idx), float(len(pick_idx))/counter['candidate']*100., len(rest_idx), float(len(rest_idx))/counter['candidate']*100.))
 
     # dump the picked candinate data
-    if use_clusters:
-        picked_systems = dpdata.MultiSystems()
-        for j in pick_idx:
-            sys_name, sys_id = labels[j]
-            picked_systems.append(sys_candinate[sys_name][sys_id])
-        sys_data_path = os.path.join(work_path, picked_data_name)
-        picked_systems.to_deepmd_raw(sys_data_path)
-        picked_systems.to_deepmd_npy(sys_data_path, set_size=iter_pick_number)
-    else:
-        selc_systems = {}
-        for j in pick_idx:
-            sys_name, sys_id = labels[j]
-            selc_systems = _add_system(selc_systems, sys_name, sys_candinate[sys_name][sys_id])
-        sys_data_path = os.path.join(work_path, picked_data_name)
-        _dump_system_dict(selc_systems, sys_data_path)
+    picked_systems = dpdata.MultiSystems()
+    for j in pick_idx:
+        sys_name, sys_id = labels[j]
+        picked_systems.append(sys_candinate[sys_name][sys_id])
+    sys_data_path = os.path.join(work_path, picked_data_name)
+    picked_systems.to_deepmd_raw(sys_data_path)
+    picked_systems.to_deepmd_npy(sys_data_path, set_size=iter_pick_number)
+
 
     # dump the rest data (not picked candinate data and failed data)
-    if use_clusters:
-        rest_systems = dpdata.MultiSystems()
-        for j in rest_idx:
-            sys_name, sys_id = labels[j]
-            rest_systems.append(sys_candinate[sys_name][sys_id])
-        rest_systems += sys_failed
-        sys_data_path = os.path.join(work_path, rest_data_name)
-        rest_systems.to_deepmd_raw(sys_data_path)
+    rest_systems = dpdata.MultiSystems()
+    for j in rest_idx:
+        sys_name, sys_id = labels[j]
+        rest_systems.append(sys_candinate[sys_name][sys_id])
+    rest_systems += sys_failed
+    sys_data_path = os.path.join(work_path, rest_data_name)
+    rest_systems.to_deepmd_raw(sys_data_path)
+    if rest_idx.size:
         rest_systems.to_deepmd_npy(sys_data_path, set_size=rest_idx.size)
-    else:
-        selc_systems = {}
-        for j in rest_idx:
-            sys_name, sys_id = labels[j]
-            selc_systems = _add_system(selc_systems, sys_name, sys_candinate[sys_name][sys_id])
-        for kk in sys_failed.keys():
-            selc_systems = _add_system(selc_systems, kk, sys_failed[kk])        
-        sys_data_path = os.path.join(work_path, rest_data_name)
-        _dump_system_dict(selc_systems, sys_data_path)
+
 
     # dump the accurate data -- to another directory
-    if use_clusters:
-        sys_data_path = os.path.join(work_path, accurate_data_name)
-        sys_accurate.to_deepmd_raw(sys_data_path)
-        sys_accurate.to_deepmd_npy(sys_data_path, set_size=sys_accurate.get_nframes())
-    else:
-        sys_data_path = os.path.join(work_path, accurate_data_name)
-        _dump_system_dict(sys_accurate, sys_data_path)
+    sys_data_path = os.path.join(work_path, accurate_data_name)
+    sys_accurate.to_deepmd_raw(sys_data_path)
+    sys_accurate.to_deepmd_npy(sys_data_path, set_size=sys_accurate.get_nframes())
 
 
 def make_fp_labeled(iter_index, jdata):    
     dlog.info("already labeled, skip make_fp and link data directly")
     pick_data = jdata['pick_data']
-    use_clusters = jdata.get('use_clusters', False)
     iter_name = make_iter_name(iter_index)
     work_path = os.path.join(iter_name, fp_name)
     create_path(work_path)
     picked_data_path = os.path.join(iter_name, model_devi_name, picked_data_name)
-    if use_clusters:
-        os.symlink(os.path.abspath(picked_data_path), os.path.abspath(
-            os.path.join(work_path, "task." + data_system_fmt % 0)))
-        os.symlink(os.path.abspath(picked_data_path), os.path.abspath(
-            os.path.join(work_path, "data." + data_system_fmt % 0)))
-    else:
-        picked_data_path = os.path.abspath(picked_data_path)
-        sys_path = glob.glob(os.path.join(picked_data_path, sys_name_pattern))
-        cwd = os.getcwd()
-        os.chdir(work_path)
-        for ii in sys_path:
-            sys_idx = os.path.basename(ii).split('.')[1]
-            data_dir = 'data.' + data_system_fmt % int(sys_idx)
-            task_dir = 'task.' + data_system_fmt % int(sys_idx)
-            os.symlink(os.path.relpath(ii), data_dir)
-            os.symlink(os.path.relpath(ii), task_dir)
-        os.chdir(cwd)
+    os.symlink(os.path.abspath(picked_data_path), os.path.abspath(
+        os.path.join(work_path, "task." + data_system_fmt % 0)))
+    os.symlink(os.path.abspath(picked_data_path), os.path.abspath(
+        os.path.join(work_path, "data." + data_system_fmt % 0)))
 
 
 def make_fp_configs(iter_index, jdata):
     pick_data = jdata['pick_data']
-    use_clusters = jdata.get('use_clusters', False)
     iter_name = make_iter_name(iter_index)
     work_path = os.path.join(iter_name, fp_name)
     create_path(work_path)
     picked_data_path = os.path.join(iter_name, model_devi_name, picked_data_name)
-    if use_clusters:
-        systems = get_multi_system(picked_data_path, jdata)
-        jj = 0
-        for system in systems:
-            for subsys in system:
-                task_name = "task." + fp_task_fmt % (0, jj)
-                task_path = os.path.join(work_path, task_name)
-                create_path(task_path)
-                subsys.to('vasp/poscar', os.path.join(task_path, 'POSCAR'))
-                jj += 1
-    else:
-        picked_data_path = os.path.abspath(picked_data_path)
-        sys_path = glob.glob(os.path.join(picked_data_path, sys_name_pattern))
-        for ii in sys_path:
-            tmp_sys = dpdata.System(ii, fmt = 'deepmd/npy')
-            sys_idx = os.path.basename(ii).split('.')[1]
-            jj = 0
-            for ss in tmp_sys:
-                task_name = "task." + fp_task_fmt % (int(sys_idx), jj)
-                task_path = os.path.join(work_path, task_name)
-                create_path(task_path)
-                ss.to('vasp/poscar', os.path.join(task_path, 'POSCAR'))
-                job = {}
-                with open(os.path.join(task_path, 'job.json'), 'w') as fp:
-                    json.dump(job, fp, indent=4)
-                jj += 1
+    systems = get_multi_system(picked_data_path, jdata)
+    jj = 0
+    for system in systems:
+        for subsys in system:
+            task_name = "task." + fp_task_fmt % (0, jj)
+            task_path = os.path.join(work_path, task_name)
+            create_path(task_path)
+            subsys.to('vasp/poscar', os.path.join(task_path, 'POSCAR'))
+            jj += 1
 
 
 def make_fp_gaussian(iter_index, jdata):
diff --git a/dpgen/util.py b/dpgen/util.py
index aa805e7e5..9491cdc30 100644
--- a/dpgen/util.py
+++ b/dpgen/util.py
@@ -1,5 +1,7 @@
 #!/usr/bin/env python
 # coding: utf-8
+from typing import Union, List
+from pathlib import Path
 
 from dpgen import dlog
 
@@ -25,3 +27,23 @@ def box_center(ch='',fill=' ',sp="|"):
     '''
     strs=ch.center(Len,fill)
     dlog.info(sp+strs[1:len(strs)-1:]+sp)
+
+
+def expand_sys_str(root_dir: Union[str, Path]) -> List[str]:
+    """Recursively iterate over directories taking those that contain `type.raw` file.
+
+    Parameters
+    ----------
+    root_dir : Union[str, Path]
+        starting directory
+
+    Returns
+    -------
+    List[str]
+        list of string pointing to system directories
+    """
+    root_dir = Path(root_dir)
+    matches = [str(d) for d in root_dir.rglob("*") if (d / "type.raw").is_file()]
+    if (root_dir / "type.raw").is_file():
+        matches.append(str(root_dir))
+    return matches

From c25cea3aac2e84f86f481afb5663d6bc87252bbc Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 9 Jun 2022 00:53:16 -0400
Subject: [PATCH 02/26] supports non-list mdata (#748)

* supports non-list mdata

The mdata of a task is a list of a single dict. This "list" looks useless and not clear enough. So this commit supports using the dict without a list.

Note that old parameters are still supported, so no breaking changes are made. The "list" is just removed from all examples. Both list and non-list parameters are in the unittest.

* fix typo
---
 README.md                                     | 26 +++++++------------
 doc/run/example-of-machine.md                 | 11 +++-----
 dpgen/remote/decide_machine.py                | 13 +++++++---
 .../machine/DeePMD-kit-1.x/machine-ali.json   | 13 ++++------
 .../machine-lsf-slurm-cp2k.json               | 13 ++++------
 .../DeePMD-kit-1.x/machine-slurm-qe.json      | 12 +++------
 .../DeePMD-kit-2.x/lebesgue_v2_machine.json   | 11 ++++----
 tests/tools/machine_fp_single2.json           | 14 ++++++++++
 tests/tools/test_convert_mdata.py             | 10 +++++--
 9 files changed, 65 insertions(+), 58 deletions(-)
 create mode 100644 tests/tools/machine_fp_single2.json

diff --git a/README.md b/README.md
index c833ed059..fb280e226 100644
--- a/README.md
+++ b/README.md
@@ -1137,7 +1137,7 @@ an example of new dpgen's machine.json
 ```json
 {
   "api_version": "1.0",
-  "train": [
+  "train":
     {
       "command": "dp",
       "machine": {
@@ -1161,9 +1161,8 @@ an example of new dpgen's machine.json
         "para_deg": 3,
         "source_list": ["/home/user1234/deepmd.1.2.4.env"]
       }
-    }
-  ],
-  "model_devi":[
+    },
+  "model_devi":
     {
       "command": "lmp",
       "machine":{
@@ -1184,9 +1183,8 @@ an example of new dpgen's machine.json
         "group_size": 5,
         "source_list": ["/home/user1234/deepmd.1.2.4.env"]
       }
-    }
-  ],
-  "fp":[
+    },
+  "fp":
     {
       "command": "vasp_std",
       "machine":{
@@ -1208,7 +1206,6 @@ an example of new dpgen's machine.json
         "source_list": ["~/vasp.env"]
       }
     }
-  ]
 }
 ```
 note1: the key "local_root" in dpgen's machine.json is always `./`
@@ -1220,7 +1217,7 @@ When switching into a new machine, you may modifying the `MACHINE`, according to
 An example for `MACHINE` is:
 ```json
 {
-  "train": [
+  "train":
     {
       "machine": {
         "batch": "slurm",
@@ -1243,9 +1240,8 @@ An example for `MACHINE` is:
         "qos": "data"
       },
       "command": "USERPATH/dp"
-    }
-  ],
-  "model_devi": [
+    },
+  "model_devi":
     {
       "machine": {
         "batch": "slurm",
@@ -1269,9 +1265,8 @@ An example for `MACHINE` is:
       },
       "command": "lmp_serial",
       "group_size": 1
-    }
-  ],
-  "fp": [
+    },
+  "fp":
     {
       "machine": {
         "batch": "slurm",
@@ -1298,7 +1293,6 @@ An example for `MACHINE` is:
       "command": "vasp_gpu",
       "group_size": 1
     }
-  ]
 }
 ```
 Following table illustrates which key is needed for three types of machine: `train`,`model_devi`  and `fp`. Each of them is a list of dicts. Each dict can be considered as an independent environmnet for calculation.
diff --git a/doc/run/example-of-machine.md b/doc/run/example-of-machine.md
index 569f85026..247c50e4f 100644
--- a/doc/run/example-of-machine.md
+++ b/doc/run/example-of-machine.md
@@ -20,7 +20,7 @@ In this section, we will show you how to perform train task at a local workstati
 In this example, we perform the `train` task on a local workstation.
 
 ```json
-"train": [
+"train":
     {
       "command": "dp",
       "machine": {
@@ -36,8 +36,7 @@ In this example, we perform the `train` task on a local workstation.
         "group_size": 1,
         "source_list": ["/home/user1234/deepmd.env"]
       }
-    }
-  ],
+    },
 ```
 
 The "command" for the train task in the DeePMD-kit is "dp".
@@ -51,7 +50,7 @@ In the resources parameter, "number_node", "cpu_per_node", and "gpu_per_node" sp
 In this example, we perform the model_devi task at a local Slurm workstation.
 
 ```json
-"model_devi": [
+"model_devi":
     {
       "command": "lmp",
       "machine": {
@@ -70,7 +69,6 @@ In this example, we perform the model_devi task at a local Slurm workstation.
         "source_list": ["/home/user1234/lammps.env"]
       }
     }
-],
 ```
 
 The "command" for the model_devi task in the LAMMPS is "lmp".
@@ -84,7 +82,7 @@ In the resources parameter, we specify the name of the queue to which the task i
 In this example, we perform the fp task at a remote PBS cluster that can be accessed via SSH.
 
 ```json
-"fp": [
+"fp":
     {
       "command": "mpirun -n 32 vasp_std",
       "machine": {
@@ -106,7 +104,6 @@ In this example, we perform the fp task at a remote PBS cluster that can be acce
         "source_list": ["/home/user1234/vasp.env"]
       }
     }
-],
 ```
 
 VASP code is used for fp task and mpi is used for parallel computing, so "mpirun -n 32" is added to specify the number of parallel threads.
diff --git a/dpgen/remote/decide_machine.py b/dpgen/remote/decide_machine.py
index 31691f322..c551be44b 100644
--- a/dpgen/remote/decide_machine.py
+++ b/dpgen/remote/decide_machine.py
@@ -36,11 +36,18 @@ def convert_mdata(mdata, task_types=["train", "model_devi", "fp"]):
     '''
     for task_type in task_types:
         if task_type in mdata:
-            for key, item in mdata[task_type][0].items():
+            if isinstance(mdata[task_type], dict):
+                task_data = mdata[task_type]
+            elif isinstance(mdata[task_type], (list, tuple)):
+                task_data = mdata[task_type][0]
+            else:
+                raise TypeError("mdata/%s should be dict or list!" % task_type)
+            for key, item in task_data.items():
                 if "comments" not in key:
                     mdata[task_type + "_" + key] = item
-            group_size = mdata[task_type][0]["resources"].get("group_size", 1)
-            if group_size == 1: group_size = mdata[task_type][0].get("group_size", 1)
+            group_size = task_data["resources"].get("group_size", 1)
+            if group_size == 1:
+                group_size = task_data.get("group_size", 1)
             mdata[task_type + "_" + "group_size"] = group_size
     return mdata
 
diff --git a/examples/machine/DeePMD-kit-1.x/machine-ali.json b/examples/machine/DeePMD-kit-1.x/machine-ali.json
index a2a338af4..e78fc9dd4 100644
--- a/examples/machine/DeePMD-kit-1.x/machine-ali.json
+++ b/examples/machine/DeePMD-kit-1.x/machine-ali.json
@@ -1,5 +1,5 @@
 {
-  "train": [
+  "train":
     {
       "machine": {
         "batch": "shell",
@@ -34,10 +34,9 @@
       },
       "command": "/root/deepmd-kit/bin/dp",
       "group_size": 2
-    }
-  ],
+    },
 
-  "model_devi": [
+  "model_devi":
     {
       "machine": {
         "batch": "shell",
@@ -71,10 +70,9 @@
       },
       "command": "/root/deepmd-kit/bin/lmp",
       "group_size": 2
-    }
-  ],
+    },
 
-  "fp": [
+  "fp":
     {
       "machine": {
         "batch": "shell",
@@ -108,7 +106,6 @@
       "command": "mpirun -n 16 /root/deepmd-pkg/vasp.5.4.4/bin/vasp_std",
       "group_size": 1
     }
-  ]
 }
 
 
diff --git a/examples/machine/DeePMD-kit-1.x/machine-lsf-slurm-cp2k.json b/examples/machine/DeePMD-kit-1.x/machine-lsf-slurm-cp2k.json
index b56d022ec..4fb5845ee 100644
--- a/examples/machine/DeePMD-kit-1.x/machine-lsf-slurm-cp2k.json
+++ b/examples/machine/DeePMD-kit-1.x/machine-lsf-slurm-cp2k.json
@@ -1,5 +1,5 @@
 {
-  "train": [
+  "train":
     {
       "machine": {
         "machine_type": "slurm",
@@ -25,9 +25,8 @@
         "submit_wait_time": 60
       },
       "python_path": "/share/apps/deepmd/compress/bin/python3.8"
-    }
-  ],
-  "model_devi": [
+    },
+  "model_devi":
     {
       "machine": {
         "machine_type": "slurm",
@@ -54,9 +53,8 @@
       },
       "command": "lmp_mpi",
       "group_size": 5
-    }
-  ],
-  "fp": [
+    },
+  "fp":
     {
       "machine": {
         "machine_type": "lsf",
@@ -87,5 +85,4 @@
       "command": "cp2k.popt -i input.inp",
       "group_size": 50
     }
-  ]
 }
diff --git a/examples/machine/DeePMD-kit-1.x/machine-slurm-qe.json b/examples/machine/DeePMD-kit-1.x/machine-slurm-qe.json
index 22a3fdbbd..5f15303d6 100644
--- a/examples/machine/DeePMD-kit-1.x/machine-slurm-qe.json
+++ b/examples/machine/DeePMD-kit-1.x/machine-slurm-qe.json
@@ -1,7 +1,7 @@
  {
   "_comment" : "This is an example of DP-GEN on Slurm",
   "_comment" : "Last updated on 2021.4.30 for DP-GEN 0.9.2 by Yuzhi Zhang", 
-  "train" :[
+  "train" :
     {
       "_comment" : "Specify the installed path of DeePMD-kit",
       "_comment" : "The version of DeePMD-kit should be 1.*", 
@@ -49,10 +49,9 @@
         "time_limit": "23:0:0",
         "_comment": "that's all"
       }
-    }
-  ],
+    },
 
-  "model_devi": [
+  "model_devi":
     {
       "machine": {
         "machine_type": "slurm",
@@ -81,10 +80,8 @@
       "command": "lmp_serial",
       "_comment" : "DP-GEN will put 5 tasks together in one submitting script.",  
       "group_size": 5
-    }
-  ], 
+    }, 
   "fp":
-  [
     {
       "machine": {
         "machine_type": "slurm",
@@ -113,5 +110,4 @@
       "command": "mpirun -n 8 /gpfs/share/home/1600017784/yuzhi/soft/QE-mpi/PW/src/pw.x < input",
       "group_size": 1
     }
-  ]
 }
diff --git a/examples/machine/DeePMD-kit-2.x/lebesgue_v2_machine.json b/examples/machine/DeePMD-kit-2.x/lebesgue_v2_machine.json
index 6b9ead467..0ecba4fa6 100644
--- a/examples/machine/DeePMD-kit-2.x/lebesgue_v2_machine.json
+++ b/examples/machine/DeePMD-kit-2.x/lebesgue_v2_machine.json
@@ -1,7 +1,7 @@
 {
   "api_version": "1.0",
   "deepmd_version": "2.0.1",
-  "train" :[
+  "train" :
     {
       "command": "dp",
       "machine": {
@@ -34,9 +34,9 @@
         "queue_name": "GPU",
         "group_size": 1
       }
-    }],
+    },
   "model_devi":
-    [{
+    {
       "command": "lmp -i input.lammps -v restart 0",
       "machine": {
         "batch_type": "DpCloudServer",
@@ -68,9 +68,9 @@
         "queue_name": "GPU",
         "group_size": 5
       }
-    }],
+    },
   "fp":
-    [{
+    {
       "command": "mpirun -n 16 vasp_std",
       "machine": {
         "batch_type": "DpCloudServer",
@@ -104,5 +104,4 @@
 	"source_list": ["/opt/intel/oneapi/setvars.sh"]
       }
     }
-  ]
 }
diff --git a/tests/tools/machine_fp_single2.json b/tests/tools/machine_fp_single2.json
new file mode 100644
index 000000000..8c2212927
--- /dev/null
+++ b/tests/tools/machine_fp_single2.json
@@ -0,0 +1,14 @@
+{
+  "fp":
+    {
+      "command": "vasp_std",
+      "machine":{
+        "batch_type": "PBS"
+      },
+      "resources": {
+        "group_size" : 8
+      },
+      "_comments" : "In user_forward_files, define input files to be uploaded.",
+      "user_forward_files" : ["vdw_kernel.bindat"]
+    }
+}
\ No newline at end of file
diff --git a/tests/tools/test_convert_mdata.py b/tests/tools/test_convert_mdata.py
index 5458b0faa..5dc1b944e 100644
--- a/tests/tools/test_convert_mdata.py
+++ b/tests/tools/test_convert_mdata.py
@@ -6,12 +6,18 @@
 __package__ = 'tools'
 from dpgen.remote.decide_machine import convert_mdata
 from .context import setUpModule
-machine_file = 'machine_fp_single.json'
+
 class TestConvertMdata(unittest.TestCase):
+    machine_file = 'machine_fp_single.json'
+
     def test_convert_mdata (self):
-        mdata = json.load(open(machine_file))
+        mdata = json.load(open(self.machine_file))
         mdata = convert_mdata(mdata, ["fp"])
         self.assertEqual(mdata["fp_command"], "vasp_std")
         self.assertEqual(mdata["fp_group_size"], 8)
         self.assertEqual(mdata["fp_machine"]["batch_type"], "PBS")
         self.assertEqual(mdata["fp_user_forward_files"], ["vdw_kernel.bindat"])
+
+
+class TestConvertMdata2(TestConvertMdata):
+    machine_file = 'machine_fp_single2.json'

From 828024b4696b2e06dfa72d2694aae541f2003e3e Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 9 Jun 2022 00:53:45 -0400
Subject: [PATCH 03/26] upgrade all tasks to dpdispatcher (#749)

* upgrade all tasks to dpdispatcher

This commit upgrades init_reaction and init_surf to use dpdispatcher

* fix method args

* fix typo

* change the variable name from `work_dir` to `work_path`
---
 dpgen/data/reaction.py         | 38 ++++++++--------
 dpgen/data/surf.py             | 11 ++---
 dpgen/dispatcher/Dispatcher.py | 79 ++++++++++++++++++++++++++++++++++
 3 files changed, 106 insertions(+), 22 deletions(-)

diff --git a/dpgen/data/reaction.py b/dpgen/data/reaction.py
index b9574d525..0abfeb965 100644
--- a/dpgen/data/reaction.py
+++ b/dpgen/data/reaction.py
@@ -7,7 +7,7 @@
 output: data
 """
 
-import argparse
+import warnings
 import glob
 import json
 import os
@@ -15,7 +15,8 @@
 
 import dpdata
 from dpgen import dlog
-from dpgen.dispatcher.Dispatcher import make_dispatcher
+from dpgen.dispatcher.Dispatcher import make_submission_compat
+from dpgen.remote.decide_machine import convert_mdata
 from dpgen.generator.run import create_path, make_fp_task_name
 from dpgen.util import sepline
 
@@ -73,14 +74,15 @@ def make_lmp(jdata):
     return lmp_string
 
 
-def run_reaxff(jdata, mdata, dispatcher, log_file="reaxff_log"):
+def run_reaxff(jdata, mdata, log_file="reaxff_log"):
     work_path = reaxff_path
     reaxff_command = "{} -in {}".format(mdata["reaxff_command"], lmp_path)
     run_tasks = glob.glob(os.path.join(work_path, 'task.*'))
     run_tasks.sort()
     run_tasks = [os.path.basename(ii) for ii in run_tasks]
 
-    dispatcher.run_jobs(mdata['reaxff_resources'],
+    make_submission_compat(mdata['reaxff_machine'],
+                        mdata['reaxff_resources'],
                         [reaxff_command],
                         work_path,
                         run_tasks,
@@ -89,7 +91,8 @@ def run_reaxff(jdata, mdata, dispatcher, log_file="reaxff_log"):
                         [ff_path, data_init_path, control_path, lmp_path],
                         [trj_path],
                         outlog=log_file,
-                        errlog=log_file)
+                        errlog=log_file,
+                        api_version=mdata.get("api_version", "0.9"))
 
 
 def link_trj(jdata):
@@ -102,7 +105,7 @@ def link_trj(jdata):
         os.path.join(task_path, trj_path)))
 
 
-def run_build_dataset(jdata, mdata, dispatcher, log_file="build_log"):
+def run_build_dataset(jdata, mdata, log_file="build_log"):
     work_path = build_path
     build_command = "{cmd} -n {dataset_name} -a {type_map} -d {lammpstrj} -c {cutoff} -s {dataset_size} -k \"{qmkeywords}\" --nprocjob {nprocjob} --nproc {nproc}".format(
         cmd=mdata["build_command"],
@@ -119,7 +122,8 @@ def run_build_dataset(jdata, mdata, dispatcher, log_file="build_log"):
     run_tasks.sort()
     run_tasks = [os.path.basename(ii) for ii in run_tasks]
 
-    dispatcher.run_jobs(mdata['build_resources'],
+    make_submission_compat(mdata['build_machine'],
+                        mdata['build_resources'],
                         [build_command],
                         work_path,
                         run_tasks,
@@ -128,7 +132,8 @@ def run_build_dataset(jdata, mdata, dispatcher, log_file="build_log"):
                         [trj_path],
                         [f"dataset_{dataset_name}_gjf"],
                         outlog=log_file,
-                        errlog=log_file)
+                        errlog=log_file,
+                        api_version=mdata.get("api_version", "0.9"))
 
 
 def link_fp_input():
@@ -146,7 +151,6 @@ def link_fp_input():
 
 def run_fp(jdata,
            mdata,
-           dispatcher,
            log_file="output",
            forward_common_files=[]):
     fp_command = mdata['fp_command']
@@ -162,7 +166,8 @@ def run_fp(jdata,
 
     run_tasks = [os.path.basename(ii) for ii in fp_run_tasks]
 
-    dispatcher.run_jobs(mdata['fp_resources'],
+    make_submission_compat(mdata['fp_machine'],
+                        mdata['fp_resources'],
                         [fp_command],
                         work_path,
                         run_tasks,
@@ -171,7 +176,8 @@ def run_fp(jdata,
                         ["input"],
                         [log_file],
                         outlog=log_file,
-                        errlog=log_file)
+                        errlog=log_file,
+                        api_version=mdata.get("api_version", "0.9"))
 
 
 def convert_data(jdata):
@@ -198,6 +204,7 @@ def gen_init_reaction(args):
             with open(args.MACHINE, "r") as fp:
                 mdata = json.load(fp)
 
+    mdata = convert_mdata(mdata, ["reaxff", "build", "fp"])
     record = "record.reaction"
     iter_rec = -1
     numb_task = 7
@@ -213,18 +220,15 @@ def gen_init_reaction(args):
         elif ii == 0:
             link_reaxff(jdata)
         elif ii == 1:
-            dispatcher = make_dispatcher(mdata["reaxff_machine"])
-            run_reaxff(jdata, mdata, dispatcher)
+            run_reaxff(jdata, mdata)
         elif ii == 2:
             link_trj(jdata)
         elif ii == 3:
-            dispatcher = make_dispatcher(mdata["build_machine"])
-            run_build_dataset(jdata, mdata, dispatcher)
+            run_build_dataset(jdata, mdata)
         elif ii == 4:
             link_fp_input()
         elif ii == 5:
-            dispatcher = make_dispatcher(mdata["fp_machine"])
-            run_fp(jdata, mdata, dispatcher)
+            run_fp(jdata, mdata)
         elif ii == 6:
             convert_data(jdata)
         with open(record, "a") as frec:
diff --git a/dpgen/data/surf.py b/dpgen/data/surf.py
index bc31b6705..543f02bc8 100644
--- a/dpgen/data/surf.py
+++ b/dpgen/data/surf.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3 
 
-import time
+import warnings
 import os,json,shutil,re,glob,argparse
 import numpy as np
 import subprocess as sp
@@ -12,7 +12,7 @@
 from dpgen import dlog
 from dpgen import ROOT_PATH
 from dpgen.remote.decide_machine import  convert_mdata
-from dpgen.dispatcher.Dispatcher import Dispatcher, make_dispatcher
+from dpgen.dispatcher.Dispatcher import make_submission_compat
 #-----PMG---------
 from pymatgen.io.vasp import Poscar
 from pymatgen.core import Structure, Element
@@ -565,15 +565,16 @@ def run_vasp_relax(jdata, mdata):
     run_tasks = [ii.replace(work_dir+"/", "") for ii in relax_run_tasks]
 
     #dlog.info(run_tasks)
-    dispatcher = make_dispatcher(mdata['fp_machine'], mdata['fp_resources'], work_dir, run_tasks, fp_group_size)
-    dispatcher.run_jobs(fp_resources,
+    make_submission_compat(mdata['fp_machine'],
+                       fp_resources,
                        [fp_command],
                        work_dir,
                        run_tasks,
                        fp_group_size,
                        forward_common_files,
                        forward_files,
-                       backward_files)
+                       backward_files,
+                       api_version=mdata.get("api_version", "0.9"))
 
 def gen_init_surf(args):
     try:
diff --git a/dpgen/dispatcher/Dispatcher.py b/dpgen/dispatcher/Dispatcher.py
index 29bea5669..abbe493b8 100644
--- a/dpgen/dispatcher/Dispatcher.py
+++ b/dpgen/dispatcher/Dispatcher.py
@@ -1,5 +1,6 @@
 from distutils.version import LooseVersion
 import os,sys,time,random,json,glob
+import warnings
 from typing import List
 from dpdispatcher import Task, Submission, Resources, Machine
 from dpgen.dispatcher.LocalContext import LocalSession
@@ -406,3 +407,81 @@ def mdata_arginfo() -> List[Argument]:
     return [
         command_arginfo, machine_arginfo, resources_arginfo,
     ]
+
+
+def make_submission_compat(
+        machine: dict,
+        resources: dict,
+        commands: List[str],
+        work_path: str,
+        run_tasks: List[str],
+        group_size: int,
+        forward_common_files: List[str],
+        forward_files: List[str],
+        backward_files: List[str],
+        outlog: str="log",
+        errlog: str="err",
+        api_version: str="0.9",
+    ) -> None:
+    """Make submission with compatibility of both dispatcher API v0 and v1.
+
+    If `api_version` is less than 1.0, use `make_dispatcher`. If
+    `api_version` is large than 1.0, use `make_submission`.
+
+    Parameters
+    ----------
+    machine : dict
+        machine dict
+    resources : dict
+        resource dict
+    commands : list[str]
+        list of commands
+    work_path : str
+        working directory
+    run_tasks : list[str]
+        list of paths to running tasks
+    group_size : int
+        group size
+    forward_common_files : list[str]
+        forwarded common files shared for all tasks
+    forward_files : list[str]
+        forwarded files for each task
+    backward_files : list[str]
+        backwarded files for each task
+    outlog : str, default=log
+        path to log from stdout
+    errlog : str, default=err
+        path to log from stderr
+    api_version : str, default=0.9
+        API version. 1.0 is recommended
+    """
+    if LooseVersion(api_version) < LooseVersion('1.0'):
+        warnings.warn(f"the dpdispatcher will be updated to new version."
+            f"And the interface may be changed. Please check the documents for more details")
+        dispatcher = make_dispatcher(machine, resources, work_dir, run_tasks, group_size)
+        dispatcher.run_jobs(resources,
+                       commands,
+                       work_path,
+                       run_tasks,
+                       group_size,
+                       forward_common_files,
+                       forward_files,
+                       backward_files,
+                       outlog=outlog,
+                       errlog=errlog)
+
+    elif LooseVersion(api_version) >= LooseVersion('1.0'):
+        submission = make_submission(
+            machine,
+            resources,
+            commands=commands,
+            work_path=work_path,
+            run_tasks=run_tasks,
+            group_size=group_size,
+            forward_common_files=forward_common_files,
+            forward_files=forward_files,
+            backward_files=backward_files,
+            outlog=outlog,
+            errlog=errlog)
+        submission.run_submission()
+

From 7a5dc0c7fec46a2dcdc49cd758a42c88cfd53462 Mon Sep 17 00:00:00 2001
From: HuangJiameng <105633685+HuangJiameng@users.noreply.github.com>
Date: Sun, 12 Jun 2022 12:43:44 +0800
Subject: [PATCH 04/26] update: add a new option of absolute volume in
 /dpgen/dpgen/auto_test/EOS.py (#741)

* update: add a new option of absolute volume in ./dpgen/auto_test/EOS.py

* update: add doc in /dpgen/doc/toymodels/

* update: change the description for eos, change the doc in /dpgen/doc/toymodels/

* update: change the notice of absolute volume from print into dlog.info
---
 doc/toymodels/JiamengHuang_pr.md |  9 +++++++++
 dpgen/auto_test/EOS.py           | 15 +++++++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)
 create mode 100644 doc/toymodels/JiamengHuang_pr.md

diff --git a/doc/toymodels/JiamengHuang_pr.md b/doc/toymodels/JiamengHuang_pr.md
new file mode 100644
index 000000000..3b1210228
--- /dev/null
+++ b/doc/toymodels/JiamengHuang_pr.md
@@ -0,0 +1,9 @@
+A new parameter "vol_abs" is added. If you want to use absolute volume to get EOS, you can add
+
+         "vol_abs":      true,
+
+in the "eos" part of property.json
+if it's not mentioned, "False" is set defaultly
+when you are using absolute volume, there will be a notation in the last line of output during "make" process, which is like
+
+treat vol_start and vol_end as absolute volume
diff --git a/dpgen/auto_test/EOS.py b/dpgen/auto_test/EOS.py
index 4f332198b..f824dba8e 100644
--- a/dpgen/auto_test/EOS.py
+++ b/dpgen/auto_test/EOS.py
@@ -24,6 +24,8 @@ def __init__(self,
                 self.vol_start = parameter['vol_start']
                 self.vol_end = parameter['vol_end']
                 self.vol_step = parameter['vol_step']
+                parameter['vol_abs'] = parameter.get('vol_abs', False)
+                self.vol_abs = parameter['vol_abs']
             parameter['cal_type'] = parameter.get('cal_type', 'relaxation')
             self.cal_type = parameter['cal_type']
             default_cal_setting = {"relax_pos": True,
@@ -117,6 +119,10 @@ def make_confs(self,
 
             else:
                 print('gen eos from ' + str(self.vol_start) + ' to ' + str(self.vol_end) + ' by every ' + str(self.vol_step))
+                if self.vol_abs : 
+                    dlog.info('treat vol_start and vol_end as absolute volume')
+                else : 
+                    dlog.info('treat vol_start and vol_end as relative volume')
                 equi_contcar = os.path.join(path_to_equi, 'CONTCAR')
                 if not os.path.exists(equi_contcar):
                     raise RuntimeError("please do relaxation first")
@@ -138,8 +144,13 @@ def make_confs(self,
                     task_list.append(output_task)
                     os.symlink(os.path.relpath(equi_contcar), 'POSCAR.orig')
                     # scale = (vol / vol_to_poscar) ** (1. / 3.)
-                    scale = vol ** (1. / 3.)
-                    eos_params = {'volume': vol * vol_to_poscar, 'scale': scale}
+
+                    if self.vol_abs :
+                        scale = (vol / vol_to_poscar) ** (1. / 3.)
+                        eos_params = {'volume': vol, 'scale': scale}
+                    else :
+                        scale = vol ** (1. / 3.)
+                        eos_params = {'volume': vol * vol_to_poscar, 'scale': scale}
                     dumpfn(eos_params, 'eos.json', indent=4)
                     self.parameter['scale2equi'].append(scale)  # 06/22
                     vasp.poscar_scale('POSCAR.orig', 'POSCAR', scale)

From 800fb519681b9dc00b07627ebde4f0ce3dfa20b0 Mon Sep 17 00:00:00 2001
From: Liu Renxi <75369672+Liu-RX@users.noreply.github.com>
Date: Tue, 14 Jun 2022 14:05:50 +0800
Subject: [PATCH 05/26] fix a bug in make_abacus_scf_input (#754)

Co-authored-by: LiuRenxi <liurenxi@LiuRenxideMacBook-Pro.local>
---
 README.md                         | 2 +-
 dpgen/generator/lib/abacus_scf.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index fb280e226..f26fb51df 100644
--- a/README.md
+++ b/README.md
@@ -570,7 +570,7 @@ The bold notation of key (such aas **type_map**) means that it's a necessary key
 | **user_fp_params** | Dict |  |Parameters for cp2k calculation. find detail in manual.cp2k.org. only the kind section must be set before use.  we assume that you have basic knowledge for cp2k input.
 | **external_input_path** | String |  | Conflict with key:user_fp_params, use the template input provided by user, some rules should be followed, read the following text in detail.
 | *fp_style == ABACUS*
-| **user_fp_params** | Dict |  |Parameters for ABACUS INPUT. find detail [Here](https://github.com/deepmodeling/abacus-develop/blob/develop/docs/input-main.md#out-descriptor). If `deepks_model` is set, the model file should be in the pseudopotential directory. 
+| **user_fp_params** | Dict |  |Parameters for ABACUS INPUT. find detail [Here](https://github.com/deepmodeling/abacus-develop/blob/develop/docs/input-main.md#out-descriptor). If `deepks_model` is set, the model file should be in the pseudopotential directory. You can also set `KPT` file by adding `k_points` that corresponds to a list of six integers in this dictionary.
 | **fp_orb_files** | List |  |List of atomic orbital files. The files should be in pseudopotential directory. 
 | **fp_dpks_descriptor** | String |  |DeePKS descriptor file name. The file should be in pseudopotential directory. 
 
diff --git a/dpgen/generator/lib/abacus_scf.py b/dpgen/generator/lib/abacus_scf.py
index 256eb1d9d..1a9882979 100644
--- a/dpgen/generator/lib/abacus_scf.py
+++ b/dpgen/generator/lib/abacus_scf.py
@@ -83,7 +83,7 @@ def make_abacus_scf_input(fp_params):
             ret += "deepks_scf %d\n" % fp_params["deepks_scf"]
         elif key == "deepks_model":
             ret += "deepks_model %s\n" % fp_params["deepks_model"]
-        else:
+        elif key != "k_points": # "k_points key is used to generate KPT file."
             ret += "%s %s\n" % (key, str(fp_params[key]))
     return ret
 

From aca0d3229408b3082693149bd888b9527db85684 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 14 Jun 2022 02:17:46 -0400
Subject: [PATCH 06/26] init_reaction: fix compatibility with new dpdispatcher
 (#755)

fix compatibility as the key was changed in the dpdispatcher
---
 dpgen/data/reaction.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/dpgen/data/reaction.py b/dpgen/data/reaction.py
index 0abfeb965..5e900f9de 100644
--- a/dpgen/data/reaction.py
+++ b/dpgen/data/reaction.py
@@ -107,6 +107,9 @@ def link_trj(jdata):
 
 def run_build_dataset(jdata, mdata, log_file="build_log"):
     work_path = build_path
+    # compatible with new dpdispatcher and old dpgen.dispatcher
+    build_ntasks = mdata["build_resources"].get("cpu_per_node", mdata["build_resources"]["task_per_node"])
+    fp_ntasks = mdata["fp_resources"].get("cpu_per_node", mdata["fp_resources"]["task_per_node"])
     build_command = "{cmd} -n {dataset_name} -a {type_map} -d {lammpstrj} -c {cutoff} -s {dataset_size} -k \"{qmkeywords}\" --nprocjob {nprocjob} --nproc {nproc}".format(
         cmd=mdata["build_command"],
         type_map=" ".join(jdata["type_map"]),
@@ -114,8 +117,8 @@ def run_build_dataset(jdata, mdata, log_file="build_log"):
         cutoff=jdata["cutoff"],
         dataset_size=jdata["dataset_size"],
         qmkeywords=jdata["qmkeywords"],
-        nprocjob=mdata["fp_resources"]["task_per_node"],
-        nproc=mdata["build_resources"]["task_per_node"],
+        nprocjob=fp_ntasks,
+        nproc=build_ntasks,
         dataset_name=dataset_name
     )
     run_tasks = glob.glob(os.path.join(work_path, 'task.*'))

From bace57e624dbdc0e40b3d5e65cc5967db16af234 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 19 Jun 2022 12:09:51 -0400
Subject: [PATCH 07/26] generate machine parameter docs for simplify and init
 (#751)

* generate machine parameter page for simplify and init

* switching to new dargs directive
---
 doc/conf.py                      |  1 +
 doc/index.rst                    |  5 +++++
 doc/init/init-bulk-mdata.rst     |  6 ++++++
 doc/init/init-reaction-mdata.rst |  6 ++++++
 doc/init/init-surf-mdata.rst     |  6 ++++++
 doc/simplify/simplify-mdata.rst  |  6 ++++++
 dpgen/arginfo.py                 | 35 +++++++++++++++++++++++++++++++
 dpgen/data/arginfo.py            | 36 ++++++++++++++++++++++++++++++++
 dpgen/generator/arginfo.py       | 16 ++------------
 dpgen/simplify/arginfo.py        | 13 ++++++++++++
 10 files changed, 116 insertions(+), 14 deletions(-)
 create mode 100644 doc/init/init-bulk-mdata.rst
 create mode 100644 doc/init/init-reaction-mdata.rst
 create mode 100644 doc/init/init-surf-mdata.rst
 create mode 100644 doc/simplify/simplify-mdata.rst
 create mode 100644 dpgen/arginfo.py
 create mode 100644 dpgen/data/arginfo.py
 create mode 100644 dpgen/simplify/arginfo.py

diff --git a/doc/conf.py b/doc/conf.py
index 9d5ecc006..99dce21b5 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -40,6 +40,7 @@
 
 extensions = [
     'deepmodeling_sphinx',
+    'dargs.sphinx',
     "sphinx_rtd_theme",
     'myst_parser',
     'sphinx.ext.autosummary',
diff --git a/doc/index.rst b/doc/index.rst
index 341ce5d79..6eea4d95b 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -32,6 +32,9 @@ DPGEN's documentation
    :maxdepth: 2
    :caption: Init   
 
+   init/init-bulk-mdata
+   init/init-surf-mdata
+   init/init-reaction-mdata
 
 .. _autotest::
 
@@ -46,6 +49,8 @@ DPGEN's documentation
    :maxdepth: 2
    :caption: Simplify  
 
+   simplify/simplify-mdata
+
 
 .. _tutorial:
 
diff --git a/doc/init/init-bulk-mdata.rst b/doc/init/init-bulk-mdata.rst
new file mode 100644
index 000000000..b3098e906
--- /dev/null
+++ b/doc/init/init-bulk-mdata.rst
@@ -0,0 +1,6 @@
+dpgen init_bulk machine parameters
+==================================
+
+.. dargs::
+   :module: dpgen.data.arginfo
+   :func: init_bulk_mdata_arginfo
diff --git a/doc/init/init-reaction-mdata.rst b/doc/init/init-reaction-mdata.rst
new file mode 100644
index 000000000..2fe35a0d8
--- /dev/null
+++ b/doc/init/init-reaction-mdata.rst
@@ -0,0 +1,6 @@
+dpgen init_reaction machine parameters
+======================================
+
+.. dargs::
+   :module: dpgen.data.arginfo
+   :func: init_reaction_mdata_arginfo
diff --git a/doc/init/init-surf-mdata.rst b/doc/init/init-surf-mdata.rst
new file mode 100644
index 000000000..35e8e322f
--- /dev/null
+++ b/doc/init/init-surf-mdata.rst
@@ -0,0 +1,6 @@
+dpgen init_surf machine parameters
+==================================
+
+.. dargs::
+   :module: dpgen.data.arginfo
+   :func: init_surf_mdata_arginfo
diff --git a/doc/simplify/simplify-mdata.rst b/doc/simplify/simplify-mdata.rst
new file mode 100644
index 000000000..995fc90f8
--- /dev/null
+++ b/doc/simplify/simplify-mdata.rst
@@ -0,0 +1,6 @@
+dpgen simplify machine parameters
+=================================
+
+.. dargs::
+   :module: dpgen.simplify.arginfo
+   :func: simplify_mdata_arginfo
diff --git a/dpgen/arginfo.py b/dpgen/arginfo.py
new file mode 100644
index 000000000..3f657942a
--- /dev/null
+++ b/dpgen/arginfo.py
@@ -0,0 +1,35 @@
+from typing import Tuple
+
+from dargs import Argument
+
+from dpgen.dispatcher.Dispatcher import mdata_arginfo
+
+
+def general_mdata_arginfo(name: str, tasks: Tuple[str]) -> Argument:
+    """Generate arginfo for general mdata.
+
+    Parameters
+    ----------
+    name : str
+        mdata name
+    tasks : tuple[str]
+        tuple of task keys, e.g. ("train", "model_devi", "fp")
+    
+    Returns
+    -------
+    Argument
+        arginfo
+    """
+    
+    doc_api_version = "Please set to 1.0"
+    doc_run_mdata = "machine.json file"
+    arg_api_version = Argument("api_version", str, optional=False, doc=doc_api_version)
+
+    sub_fields = [arg_api_version]
+    doc_mdata = "Parameters of command, machine, and resources for %s"
+    for task in tasks:
+        sub_fields.append(Argument(
+            task, dict, optional=False, sub_fields=mdata_arginfo(),
+            doc=doc_mdata % task,
+        ))
+    return Argument(name, dict, sub_fields=sub_fields, doc=doc_run_mdata)
diff --git a/dpgen/data/arginfo.py b/dpgen/data/arginfo.py
new file mode 100644
index 000000000..d5814c036
--- /dev/null
+++ b/dpgen/data/arginfo.py
@@ -0,0 +1,36 @@
+from dargs import Argument
+
+from dpgen.arginfo import general_mdata_arginfo
+
+
+def init_bulk_mdata_arginfo() -> Argument:
+    """Generate arginfo for dpgen init_bulk mdata.
+    
+    Returns
+    -------
+    Argument
+        arginfo
+    """
+    return general_mdata_arginfo("init_bulk_mdata", ("fp",))
+
+
+def init_surf_mdata_arginfo() -> Argument:
+    """Generate arginfo for dpgen init_surf mdata.
+    
+    Returns
+    -------
+    Argument
+        arginfo
+    """
+    return general_mdata_arginfo("init_surf_mdata", ("fp",))
+
+
+def init_reaction_mdata_arginfo() -> Argument:
+    """Generate arginfo for dpgen init_reaction mdata.
+    
+    Returns
+    -------
+    Argument
+        arginfo
+    """
+    return general_mdata_arginfo("init_reaction_mdata", ("reaxff", "build", "fp"))
diff --git a/dpgen/generator/arginfo.py b/dpgen/generator/arginfo.py
index f8815862d..cb2fb887b 100644
--- a/dpgen/generator/arginfo.py
+++ b/dpgen/generator/arginfo.py
@@ -1,6 +1,6 @@
 from dargs import Argument
 
-from dpgen.dispatcher.Dispatcher import mdata_arginfo
+from dpgen.arginfo import general_mdata_arginfo
 
 def run_mdata_arginfo() -> Argument:
     """Generate arginfo for dpgen run mdata.
@@ -10,16 +10,4 @@ def run_mdata_arginfo() -> Argument:
     Argument
         arginfo
     """
-    
-    doc_api_version = "Please set to 1.0"
-    doc_run_mdata = "machine.json file"
-    arg_api_version = Argument("api_version", str, optional=False, doc=doc_api_version)
-
-    sub_fields = [arg_api_version]
-    doc_mdata = "Parameters of command, machine, and resources for %s"
-    for task in ("train", "model_devi", "fp"):
-        sub_fields.append(Argument(
-            task, dict, optional=False, sub_fields=mdata_arginfo(),
-            doc=doc_mdata % task,
-        ))
-    return Argument("run_mdata", dict, sub_fields=sub_fields, doc=doc_run_mdata)
+    return general_mdata_arginfo("run_mdata", ("train", "model_devi", "fp"))
diff --git a/dpgen/simplify/arginfo.py b/dpgen/simplify/arginfo.py
new file mode 100644
index 000000000..0fbfe606e
--- /dev/null
+++ b/dpgen/simplify/arginfo.py
@@ -0,0 +1,13 @@
+from dargs import Argument
+
+from dpgen.arginfo import general_mdata_arginfo
+
+def simplify_mdata_arginfo() -> Argument:
+    """Generate arginfo for dpgen simplify mdata.
+    
+    Returns
+    -------
+    Argument
+        arginfo
+    """
+    return general_mdata_arginfo("simplify_mdata", ("train", "model_devi", "fp"))

From 9083b1d795a8c2dcfd6c3c002705b00b96506dea Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 20 Jun 2022 23:17:34 -0400
Subject: [PATCH 08/26] add auto cli docs (#759)

* add auto cli docs

* fix typo

* fix package name...

* forgot to return parser

* add the blank line
---
 doc/conf.py          |  1 +
 doc/index.rst        |  2 ++
 doc/overview/cli.rst |  7 +++++++
 doc/requirements.txt |  1 +
 dpgen/main.py        | 16 +++++++++++++---
 5 files changed, 24 insertions(+), 3 deletions(-)
 create mode 100644 doc/overview/cli.rst

diff --git a/doc/conf.py b/doc/conf.py
index 99dce21b5..97b4b206b 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -45,6 +45,7 @@
     'myst_parser',
     'sphinx.ext.autosummary',
     'sphinx.ext.viewcode',
+    'sphinxarg.ext',
 ]
 
 
diff --git a/doc/index.rst b/doc/index.rst
index 6eea4d95b..eaa229813 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -7,6 +7,8 @@ DPGEN's documentation
 .. toctree::
    :maxdepth: 2
    :caption: Overview
+
+   overview/cli
   
   
 .. _installation::
diff --git a/doc/overview/cli.rst b/doc/overview/cli.rst
new file mode 100644
index 000000000..e57f1b064
--- /dev/null
+++ b/doc/overview/cli.rst
@@ -0,0 +1,7 @@
+Command line interface
+======================
+
+.. argparse::
+   :module: dpgen.main
+   :func: main_parser
+   :prog: dpgen
diff --git a/doc/requirements.txt b/doc/requirements.txt
index 33ad28e39..0ae5c76f1 100644
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -2,6 +2,7 @@ sphinx>=4.0.2
 recommonmark
 sphinx_rtd_theme
 sphinx_markdown_tables
+sphinx-argparse
 myst-parser
 deepmodeling_sphinx
 .
diff --git a/dpgen/main.py b/dpgen/main.py
index 6dcdc4ccd..c93c41ef4 100644
--- a/dpgen/main.py
+++ b/dpgen/main.py
@@ -30,9 +30,14 @@
 __email__ = ""
 
 
-def main():
-    info()
-    print("Description\n------------")
+def main_parser() -> argparse.ArgumentParser:
+    """Returns parser for `dpgen` command.
+    
+    Returns
+    -------
+    argparse.ArgumentParser
+        parser for `dpgen` command
+    """
     parser = argparse.ArgumentParser(description="""
     dpgen is a convenient script that uses DeepGenerator to prepare initial
     data, drive DeepMDkit and analyze results. This script works based on
@@ -156,8 +161,13 @@ def main():
                         help="parameter file, json format")
 
     parser_db.set_defaults(func=db_run)
+    return parser
 
 
+def main():
+    info()
+    print("Description\n------------")
+    parser = main_parser()
     try:
         import argcomplete
         argcomplete.autocomplete(parser)

From f5c317228d1673f9428edbd5453c7b3a41aaf330 Mon Sep 17 00:00:00 2001
From: HuangJiameng <105633685+HuangJiameng@users.noreply.github.com>
Date: Wed, 29 Jun 2022 10:01:41 +0800
Subject: [PATCH 09/26] correct the wrong spelling of 'failure' (#764)

---
 dpgen/dispatcher/DispatcherList.py                      | 6 +++---
 examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/dpgen/dispatcher/DispatcherList.py b/dpgen/dispatcher/DispatcherList.py
index 085ae5d9a..22b77fd50 100644
--- a/dpgen/dispatcher/DispatcherList.py
+++ b/dpgen/dispatcher/DispatcherList.py
@@ -45,7 +45,7 @@ def run_jobs(self,
                  mark_failure = False,
                  outlog = 'log',
                  errlog = 'err'):
-        ratio_failure = self.mdata_resources.get("ratio_failue", 0)
+        ratio_failure = self.mdata_resources.get("ratio_failure", 0)
         while True:
             if self.check_all_dispatchers_finished(ratio_failure):
                 self.clean()
@@ -188,7 +188,7 @@ def make_dispatcher(self, ii):
 
 
     # Base
-    def check_dispatcher_status(self, ii, allow_failue=False):
+    def check_dispatcher_status(self, ii, allow_failure=False):
         '''catch running dispatcher exception
            if no exception occured, check finished'''
         if self.dispatcher_list[ii]["dispatcher_status"] == "running":
@@ -198,7 +198,7 @@ def check_dispatcher_status(self, ii, allow_failue=False):
                 clean = self.mdata_resources.get("clean", False)
                 try:
                     # avoid raising ssh exception in download proceess
-                    finished = self.dispatcher_list[ii]["dispatcher"].all_finished(self.dispatcher_list[ii]["entity"].job_handler, allow_failue, clean)
+                    finished = self.dispatcher_list[ii]["dispatcher"].all_finished(self.dispatcher_list[ii]["entity"].job_handler, allow_failure, clean)
                     if finished:
                         self.dispatcher_list[ii]["dispatcher_status"] = "finished"
                 except Exception:
diff --git a/examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json b/examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json
index 3de59661f..e2db8d254 100644
--- a/examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json
+++ b/examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json
@@ -64,7 +64,7 @@
       },
       "resources": {
         "allow_failure": true,
-        "ratio_failue":  0.05,
+        "ratio_failure":  0.05,
         "task_per_node": 16,
         "with_mpi":      true,
         "_comment" : "Load the intel compiler.",

From 66d856cd9862e9b7becef596c4d8a8cd504cee06 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 1 Jul 2022 01:22:16 -0400
Subject: [PATCH 10/26] upgrade machine examples to new dpdispatcher (#762)

---
 .../DeePMD-kit-1.0/machine-local-4GPU.json    | 165 +++++++------
 .../machine/DeePMD-kit-1.x/machine-local.json | 103 ++++----
 .../machine-lsf-slurm-cp2k.json               | 178 +++++++-------
 .../DeePMD-kit-1.x/machine-pbs-gaussian.json  | 163 +++++++------
 .../DeePMD-kit-1.x/machine-slurm-qe.json      | 223 +++++++++---------
 5 files changed, 442 insertions(+), 390 deletions(-)

diff --git a/examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json b/examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json
index e2db8d254..e0e6bfca0 100644
--- a/examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json
+++ b/examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json
@@ -1,79 +1,90 @@
 {
-  "_comment" : "This is an example of DP-GEN on Local device running with 4 GPUs",
-  "_comment" : "Last updated on 2021.5.9 for DP-GEN 0.9.2 by Ke XU",
-  "train": [
-    {
-      "_comment" : "Specify the installed path of DeePMD-kit",
-      "_comment" : "The version of DeePMD-kit should be 1.*",
-      "command": "/home/user/anaconda3/bin/dp",
-      "_comment" : "Specify machine settings",
-      "machine": {
-        "_comment" : "Supported batches include slurm, pbs, shell, lsf.",
-        "batch": "shell",
-        "work_path": "/tmp/dpwork",
-        "_comment": "that's all"
-      },
-      "resources":{
-        "_comment" : "The number of nodes.",
-        "numb_node":     1,
-        "_comment" : "If you choose to run with multiple GPUs simultaneously, just ignore numb_gpu.",
-        "numb_gpu":      0,
-        "_comment" : "The number of CPUs.",
-        "task_per_node": 4,
-        "_comment" : "The number of GPUs that can be used for each task.",
-        "manual_cuda_devices": 4,
-        "_comment" : "The number of tasks that can be run in each GPU.",
-        "manual_cuda_multiplicity":1,
-        "_comment" : "Allow the multi-GPU task running.",
-        "cuda_multi_task": true,
-        "module_list":   [],
-        "_comment" : "Environment to be activated. This will generate source xxx/psxevars.sh in scripts. ",
-        "source_list":   ["/opt/intel/parallel_studio_xe_2020/psxevars.sh"]
-      },
-      "_comment" : "DP-GEN will put 4 tasks together in one submitting script.",
-      "group_size": 4
-    }
-  ],
-
-  "model_devi": [
-    {
-      "machine": {
-        "batch":     "shell",
-        "work_path": "/tmp/dpwork"
-      },
-      "resources": {
-        "numb_node":     1,
-        "numb_gpu":      0,
-        "task_per_node": 4,
-        "manual_cuda_devices": 4,
-        "manual_cuda_multiplicity":1,
-        "cuda_multi_task": true,
-        "source_list":   [],
-        "module_list":   []
-      },
-      "command":    "/home/user/Soft/Deepmd/lammps-stable_29Oct2020/src/lmp_mpi",
-      "group_size": 4
-    }
-  ],
-
-  "fp": [
-    {
-      "machine": {
-        "batch":     "shell",
-        "work_path": "/tmp/dpwork"
-      },
-      "resources": {
-        "allow_failure": true,
-        "ratio_failure":  0.05,
-        "task_per_node": 16,
-        "with_mpi":      true,
-        "_comment" : "Load the intel compiler.",
-        "source_list":   ["/opt/intel/parallel_studio_xe_2020/psxevars.sh"],
-        "envs":          {"PATH" : "/home/user/Soft/VASP/vasp.5.4.4-allbak/bin:$PATH"},
-        "_comment" : "This will generate export PATH=/home/user/Soft/VASP/vasp.5.4.4-allbak/bin:$PATH in scripts;"
-      },
-      "command":    "vasp_std",
-      "group_size": 1
-    }
-  ]
+    "_comment" : "This is an example of DP-GEN on Local device running with 4 GPUs",
+    "_comment": "Last updated on 2021.5.9 for DP-GEN 0.9.2 by Ke XU",
+    "train": {
+        "_comment" : "Specify the installed path of DeePMD-kit",
+        "command": "/home/user/anaconda3/bin/dp",
+        "_comment" : "Specify machine settings",
+        "machine": {
+            "_comment": "Supported batches include slurm, pbs, shell, lsf.",
+            "batch_type": "shell",
+            "context_type": "local",
+            "remote_root": "/tmp/dpwork",
+            "local_root": "./"
+        },
+        "resources": {
+            "module_list": [],
+            "_comment": "Environment to be activated. This will generate source xxx/psxevars.sh in scripts. ",
+            "source_list": [
+                "/opt/intel/parallel_studio_xe_2020/psxevars.sh"
+            ],
+            "batch_type": "shell",
+            "_comment": "DP-GEN will put 4 tasks together in one submitting script.",
+            "group_size": 4,
+            "_comment" : "The number of nodes.",
+            "number_node": 1,
+            "_comment" : "The number of CPUs.",
+            "cpu_per_node": 4,
+            "_comment" : "If you choose to run with multiple GPUs simultaneously, just ignore numb_gpu.",
+            "gpu_per_node": 0,
+            "kwargs": {},
+            "strategy": {
+                "_comment" : "Allow the multi-GPU task running.",
+                "if_cuda_multi_devices": true
+            },
+            "para_deg": 4,
+            "queue_name": ""
+        }
+    },
+    "model_devi": {
+        "machine": {
+            "batch_type": "shell",
+            "context_type": "local",
+            "remote_root": "/tmp/dpwork",
+            "local_root": "./"
+        },
+        "resources": {
+            "source_list": [],
+            "module_list": [],
+            "batch_type": "shell",
+            "group_size": 4,
+            "number_node": 1,
+            "cpu_per_node": 4,
+            "gpu_per_node": 0,
+            "kwargs": {},
+            "strategy": {
+                "if_cuda_multi_devices": true
+            },
+            "para_deg": 4,
+            "queue_name": ""
+        },
+        "command": "/home/user/Soft/Deepmd/lammps-stable_29Oct2020/src/lmp_mpi"
+    },
+    "fp": {
+        "machine": {
+            "batch_type": "shell",
+            "context_type": "local",
+            "remote_root": "/tmp/dpwork",
+            "local_root": "./"
+        },
+        "resources": {
+            "_comment" : "Load the intel compiler.",
+            "source_list": [
+                "/opt/intel/parallel_studio_xe_2020/psxevars.sh"
+            ],
+            "_comment": "This will generate export PATH=/home/user/Soft/VASP/vasp.5.4.4-allbak/bin:$PATH in scripts;",
+            "envs": {
+                "PATH": "/home/user/Soft/VASP/vasp.5.4.4-allbak/bin:$PATH"
+            },
+            "batch_type": "shell",
+            "group_size": 1,
+            "cpu_per_node": 16,
+            "kwargs": {},
+            "queue_name": "",
+            "number_node": 1,
+            "gpu_per_node": 1
+        },
+        "command": "mpirun -n 16 vasp_std || :"
+    },
+    "api_version": "1.0"
 }
diff --git a/examples/machine/DeePMD-kit-1.x/machine-local.json b/examples/machine/DeePMD-kit-1.x/machine-local.json
index a266f712b..c8134d750 100644
--- a/examples/machine/DeePMD-kit-1.x/machine-local.json
+++ b/examples/machine/DeePMD-kit-1.x/machine-local.json
@@ -1,42 +1,65 @@
 {
-    "_comment":		"training on localhost ",
-    "_comment" : "This is for DeePMD-kit 1.*",
-    "train_command" : "/home/wanghan/local/deepmd/1.*/dp",
-    "train_machine":	{
-	"batch":	"shell",
-	"work_path" :	"/home/wanghan/tmp/subs/"
-    },	
-    "train_resources":	{
-	"envs":		{
-	}
-    },    
-
-
-    "_comment":		"model_devi on localhost ",
-    "model_devi_command":	"/home/wanghan/local/bin/lmp_mpi_010",
-    "model_devi_group_size": 5,
-    "model_devi_machine":	{
-	"batch":	"shell",
-	"_comment" : "If lazy_local is true, calculations are done directly in current folders.",
-	"lazy_local" : true
-    },	
-    "model_devi_resources":	{
-    },    
-
-    "_comment":		"fp on localhost ",
-    "fp_command":	"/home/wanghan/local/bin/vasp_std",
-    "fp_group_size":	2,
-    "fp_machine":	{
-	"batch":	"shell",
-	"work_path" :	"/home/wanghan/tmp/subs/",
-	"_comment" :	"that's all"
-    },	
-    "fp_resources":	{
-	"module_list":  ["mpi"],
-	"task_per_node":4,
-	"with_mpi":	true,
-	"_comment":	"that's all"
+    "api_version": "1.0",
+    "train": {
+        "_comment": "training on localhost",
+        "machine": {
+            "batch_type": "shell",
+            "context_type": "local",
+            "remote_root": "/home/wanghan/tmp/subs/",
+            "local_root": "./"
+        },
+        "resources": {
+            "envs": {},
+            "batch_type": "shell",
+            "group_size": 1,
+            "kwargs": {},
+            "queue_name": "",
+            "number_node": 1,
+            "cpu_per_node": 1,
+            "gpu_per_node": 1
+        },
+        "command": "/home/wanghan/local/deepmd/1.*/dp"
     },
-
-    "_comment":		" that's all "
-}
+    "model_devi": {
+        "_comment": "model devi on localhost",
+        "machine": {
+            "_comment": "If lazy_local, calculations are done directly in current folders.",
+            "batch_type": "shell",
+            "context_type": "lazylocal",
+            "local_root": "./"
+        },
+        "resources": {
+            "batch_type": "shell",
+            "group_size": 5,
+            "kwargs": {},
+            "queue_name": "",
+            "number_node": 1,
+            "cpu_per_node": 1,
+            "gpu_per_node": 1
+        },
+        "command": "/home/wanghan/local/bin/lmp_mpi_010"
+    },
+    "fp": {
+        "_comment": "fp on localhost",
+        "machine": {
+            "batch_type": "shell",
+            "context_type": "local",
+            "remote_root": "/home/wanghan/tmp/subs/",
+            "local_root": "./"
+        },
+        "resources": {
+            "module_list": [
+                "mpi"
+            ],
+            "_comment": "that's all",
+            "batch_type": "shell",
+            "group_size": 2,
+            "cpu_per_node": 4,
+            "kwargs": {},
+            "queue_name": "",
+            "number_node": 1,
+            "gpu_per_node": 1
+        },
+        "command": "mpirun -n 4 /home/wanghan/local/bin/vasp_std"
+    }
+}
\ No newline at end of file
diff --git a/examples/machine/DeePMD-kit-1.x/machine-lsf-slurm-cp2k.json b/examples/machine/DeePMD-kit-1.x/machine-lsf-slurm-cp2k.json
index 4fb5845ee..348609c1e 100644
--- a/examples/machine/DeePMD-kit-1.x/machine-lsf-slurm-cp2k.json
+++ b/examples/machine/DeePMD-kit-1.x/machine-lsf-slurm-cp2k.json
@@ -1,88 +1,98 @@
 {
-  "train":
-    {
-      "machine": {
-        "machine_type": "slurm",
-        "hostname": "210.34.15.205",
-        "port": 22,
-        "username": "ybzhuang",
-        "work_path": "/home/ybzhuang/workdir"
-      },
-      "resources": {
-        "numb_gpu": 1,
-        "numb_node": 1,
-        "task_per_node": 1,
-        "partition": "gpu",
-        "job_name": "train",
-        "qos":"emergency",
-        "exclude_list": [],
-        "source_list": [
-        ],
-        "module_list": [
-            "deepmd/1.2"
-		],
-        "time_limit": "96:0:0",
-        "submit_wait_time": 60
-      },
-      "python_path": "/share/apps/deepmd/compress/bin/python3.8"
+    "train": {
+        "machine": {
+            "batch_type": "slurm",
+            "context_type": "ssh",
+            "remote_profile": {
+                "hostname": "210.34.15.205",
+                "port": 22,
+                "username": "ybzhuang"
+            },
+            "remote_root": "/home/ybzhuang/workdir",
+            "local_root": "./"
+        },
+        "resources": {
+            "source_list": [],
+            "module_list": [
+                "deepmd/1.2"
+            ],
+            "batch_type": "slurm",
+            "number_node": 1,
+            "cpu_per_node": 1,
+            "gpu_per_node": 1,
+            "queue_name": "emergency",
+            "custom_flags": [
+                "#SBATCH -t 96:0:0"
+            ],
+            "kwargs": {},
+            "wait_time": 60,
+            "group_size": 1
+        },
+        "command": "/share/apps/deepmd/compress/bin/python3.8-m deepmd"
     },
-  "model_devi":
-    {
-      "machine": {
-        "machine_type": "slurm",
-        "hostname": "210.34.15.205",
-        "port": 22,
-        "username": "ybzhuang",
-        "work_path": "/home/ybzhuang/workdir"
-      },
-      "resources": {
-	"numb_gpu": 1,
-        "numb_node": 1,
-        "task_per_node": 1,
-        "partition": "gpu",
-        "job_name": "md",
-        "qos":"emergency",
-        "exclude_list": [],
-        "source_list": [
-        ],
-        "module_list": [
-            "deepmd/1.2"
-		],
-        "time_limit": "96:0:0",
-        "submit_wait_time": 60
-      },
-      "command": "lmp_mpi",
-      "group_size": 5
+    "model_devi": {
+        "machine": {
+            "batch_type": "slurm",
+            "context_type": "ssh",
+            "remote_profile": {
+                "hostname": "210.34.15.205",
+                "port": 22,
+                "username": "ybzhuang"
+            },
+            "remote_root": "/home/ybzhuang/workdir",
+            "local_root": "./"
+        },
+        "resources": {
+            "source_list": [],
+            "module_list": [
+                "deepmd/1.2"
+            ],
+            "batch_type": "slurm",
+            "group_size": 5,
+            "number_node": 1,
+            "cpu_per_node": 1,
+            "gpu_per_node": 1,
+            "queue_name": "emergency",
+            "custom_flags": [
+                "#SBATCH -t 96:0:0"
+            ],
+            "kwargs": {},
+            "wait_time": 60
+        },
+        "command": "lmp_mpi"
     },
-  "fp":
-    {
-      "machine": {
-        "machine_type": "lsf",
-        "hostname": "localhost",
-        "port": 6666,
-        "username": "ybzhuang",
-        "work_path": "/data/ybzhuang/methane-dpgen/dpgen-tutorial-2020-08-23/dpgen-tutorial-mathane/workpath"
-      },
-      "resources": {
-        "cvasp": false,
-        "task_per_node": 32,
-        "numb_node": 1,
-        "node_cpu": 32,
-        "exclude_list": [],
-        "with_mpi": true,
-        "source_list": [
-        ],
-        "module_list": [
-            "intel/17.5.239",
-            "mpi/intel/2017.5.239",
-            "gcc/5.5.0",
-            "cp2k/7.1"
+    "fp": {
+        "machine": {
+            "batch_type": "lsf",
+            "context_type": "ssh",
+            "remote_profile": {
+                "hostname": "localhost",
+                "port": 6666,
+                "username": "ybzhuang"
+            },
+            "remote_root": "/data/ybzhuang/methane-dpgen/dpgen-tutorial-2020-08-23/dpgen-tutorial-mathane/workpath",
+            "local_root": "./"
+        },
+        "resources": {
+            "source_list": [],
+            "module_list": [
+                "intel/17.5.239",
+                "mpi/intel/2017.5.239",
+                "gcc/5.5.0",
+                "cp2k/7.1"
+            ],
+            "batch_type": "lsf",
+            "group_size": 50,
+            "number_node": 1,
+            "cpu_per_node": 32,
+            "queue_name": "53-medium",
+            "custom_flags": [
+                "#BSUB -W 12:00:00"
             ],
-        "time_limit": "12:00:00",
-        "partition": "53-medium",
-        "_comment": "that's Bel"
-      },
-      "command": "cp2k.popt -i input.inp",
-      "group_size": 50
-    }
-}
+            "kwargs": {},
+            "gpu_per_node": 1
+        },
+        "command": "mpirun -n 32 cp2k.popt -i input.inp"
+    },
+    "api_version": "1.0"
+}
\ No newline at end of file
diff --git a/examples/machine/DeePMD-kit-1.x/machine-pbs-gaussian.json b/examples/machine/DeePMD-kit-1.x/machine-pbs-gaussian.json
index 6893471c5..daa743dcc 100644
--- a/examples/machine/DeePMD-kit-1.x/machine-pbs-gaussian.json
+++ b/examples/machine/DeePMD-kit-1.x/machine-pbs-gaussian.json
@@ -1,79 +1,88 @@
 {
-    "_comment":		"training on localhost ",
-    "train_command":	"/gpfs/home/tzhu/anaconda3/envs/python3.6/bin/dp",
-    "train_machine":	{
-      "machine_type":	"lsf",
-      "hostname" :	"xxx.xxx.xxx.xxx",
-      "port" :	22,
-      "username":	"tzhu",
-      "work_path" :	"/gpfs/home/tzhu/jzzeng/dpgen_workdir",
-      "_comment" :	"that's all"
-        },	
-        "train_resources":	{
-      "source_list":	[ "activate deepmd" ],
-      "envs": {
-        "KMP_BLOCKTIME": 0,
-        "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0"
-      },
-      "numb_gpu": 1,
-      "numb_node": 1,
-      "node_cpu": 0,
-      "partition": "newgpu",
-      "job_name": "dpgen_jzzeng",
-      "with_mpi":	false,
-      "time_limit": false,
-      "_comment":	"that's all"
-    },    
-
-
-    "_comment":		"model_devi on localhost ",
-    "model_devi_command":	"/gpfs/home/tzhu/lammps-stable_5Jun2019/src/lmp_intel_cpu_intelmpi -pk intel 0 omp 2",
-    "model_devi_group_size": 1,
-    "model_devi_machine":	{
-      "machine_type":	"lsf",
-      "hostname" :	"xxx.xxx.xxx.xxx",
-      "port" :	22,
-      "username":	"tzhu",
-      "work_path" :	"/gpfs/home/tzhu/jzzeng/dpgen_workdir",
-      "_comment" :	"that's all"
-    },	
-    "model_devi_resources":	{
-      "envs": {
-        "KMP_BLOCKTIME": 0
-      },
-      "source_list":	[ "activate deepmd" ],
-      "numb_gpu": 1,
-      "numb_node": 1,
-      "node_cpu": 0,
-      "time_limit": false,
-      "partition": "newgpu",
-      "job_name": "dpgen_jzzeng",
-      "with_mpi":	true,
-      "task_per_node": 1,
-      "_comment":	"that's all"
-    },    
-
-    "_comment":		"fp on lsf //localhost ",
-    "fp_command":	"/public/home/tzhu/g16/g16 < input",
-    "fp_group_size":	1,
-    "fp_machine":	{
-      "machine_type":	"pbs",
-      "hostname" :	"xxx.xxx.xxx.xxx",
-      "port" :	2323,
-      "username":	"tzhu",
-      "work_path" :	"/public/home/tzhu/jzzeng/dpgen_workdir",
-      "_comment" :	"that's all"
-    },	
-    "fp_resources":	{
-      "node_cpu":28,
-      "numb_node": 1,
-      "job_name": "dpgen_jzzeng",
-      "task_per_node": 28,
-      "with_mpi":	false,
-      "time_limit": "10:00:00",
-      "allow_failure": true,
-      "partition": "small",
-      "_comment":	"that's all"
+    "api_version": "1.0",
+    "train": {
+        "machine": {
+            "batch_type": "lsf",
+            "context_type": "ssh",
+            "remote_profile": {
+                "hostname": "xxx.xxx.xxx.xxx",
+                "port": 22,
+                "username": "tzhu"
+            },
+            "remote_root": "/gpfs/home/tzhu/jzzeng/dpgen_workdir",
+            "local_root": "./"
+        },
+        "resources": {
+            "source_list": [
+                "activate deepmd"
+            ],
+            "envs": {
+                "KMP_BLOCKTIME": 0,
+                "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0"
+            },
+            "batch_type": "lsf",
+            "group_size": 1,
+            "number_node": 1,
+            "cpu_per_node": 0,
+            "gpu_per_node": 1,
+            "queue_name": "newgpu",
+            "kwargs": {}
+        },
+        "command": "/gpfs/home/tzhu/anaconda3/envs/python3.6/bin/dp"
     },
-    "_comment":		" that's all "
-}
+    "model_devi": {
+        "machine": {
+            "batch_type": "lsf",
+            "context_type": "ssh",
+            "remote_profile": {
+                "hostname": "xxx.xxx.xxx.xxx",
+                "port": 22,
+                "username": "tzhu"
+            },
+            "remote_root": "/gpfs/home/tzhu/jzzeng/dpgen_workdir",
+            "local_root": "./"
+        },
+        "resources": {
+            "envs": {
+                "KMP_BLOCKTIME": 0
+            },
+            "source_list": [
+                "activate deepmd"
+            ],
+            "batch_type": "lsf",
+            "group_size": 1,
+            "number_node": 1,
+            "cpu_per_node": 0,
+            "gpu_per_node": 1,
+            "queue_name": "newgpu",
+            "kwargs": {}
+        },
+        "command": "mpirun -n 0 /gpfs/home/tzhu/lammps-stable_5Jun2019/src/lmp_intel_cpu_intelmpi -pk intel 0 omp 2"
+    },
+    "fp": {
+        "machine": {
+            "batch_type": "pbs",
+            "context_type": "ssh",
+            "remote_profile": {
+                "hostname": "xxx.xxx.xxx.xxx",
+                "port": 2323,
+                "username": "tzhu"
+            },
+            "remote_root": "/public/home/tzhu/jzzeng/dpgen_workdir",
+            "local_root": "./"
+        },
+        "resources": {
+            "batch_type": "pbs",
+            "group_size": 1,
+            "number_node": 1,
+            "cpu_per_node": 28,
+            "queue_name": "small",
+            "custom_flags": [
+                "#PBS -l walltime=10:00:00"
+            ],
+            "kwargs": {},
+            "gpu_per_node": 1
+        },
+        "command": "/public/home/tzhu/g16/g16 < input || :"
+    }
+}
\ No newline at end of file
diff --git a/examples/machine/DeePMD-kit-1.x/machine-slurm-qe.json b/examples/machine/DeePMD-kit-1.x/machine-slurm-qe.json
index 5f15303d6..2ff5b4a4b 100644
--- a/examples/machine/DeePMD-kit-1.x/machine-slurm-qe.json
+++ b/examples/machine/DeePMD-kit-1.x/machine-slurm-qe.json
@@ -1,113 +1,112 @@
- {
-  "_comment" : "This is an example of DP-GEN on Slurm",
-  "_comment" : "Last updated on 2021.4.30 for DP-GEN 0.9.2 by Yuzhi Zhang", 
-  "train" :
-    {
-      "_comment" : "Specify the installed path of DeePMD-kit",
-      "_comment" : "The version of DeePMD-kit should be 1.*", 
-      "command": "PATH_TO_DEEPMD/dp",
-      "_comment" : "Specify machine settings", 
-      "machine": {
-       "_comment" : "Supported batches include slurm, pbs, shell, lsf.",  
-        "batch": "slurm",
-        "_comment" : "If your jobs are executed on a local workstation, you can let hostname be localhost.",
-        "_comment" : "Otherwise you should provide the IP of machine you want to connect via ssh.",
-        "hostname": "localhost",
-        "_comment" : "The port for connection, most common settings is 22",
-        "port": 22,
-        "_comment" : "Specify your username. Sometimes you may need specify password. Exactly the name of key is password. ",
-        "username": "USERNAME",
-        "_comment" : "Specify where you want your job executes, all of tasks will be sent to work_path on this machine.",
-        "_comment" : "You should alwasy make sure that directory of work_path exits. ",
-        "work_path": "PATH_TO_WORK",
-        "_comment": "that's all"
-      },
-      "resources": {
-        "_comment" : "The number of nodes. This will generate #SBATCH -N 1 in your script. ",
-        "numb_node": 1,
-        "_comment" : "The number of GPU cards. #SBATCH --gres=gpu:1", 
-        "numb_gpu": 1,
-        "_comment" : "The number of CPUs. #SBATCH -n 4",
-        "task_per_node": 4,
-        "_comment" : "Partition. #SBATCH -p all",
-        "partition": "all",
-        "_comment" : "Memory limit. #SBATCH --mem=16G",
-        "mem_limit": 16,
-        "_comment" : "Nodelist to be excluded. #SBATCH --exclude=gpu06,gpu07", 
-        "exclude_list": [
-          "gpu06",
-          "gpu07"
-        ],
-        "_comment" : "Environment to be activated. This will generate source PATH/train_new.env . ",
-        "source_list": [
-          "PATH/train_new.env"
-        ],
-        "_comment" : " Module is a common tools on HPC clustes to manage softwares for multiple users.",
-        "_comment" : "Modules to be loaded. This will generate module load intel",
-        "module_list": ["intel"],
-        "_comment" : "Time limit. ",
-        "time_limit": "23:0:0",
-        "_comment": "that's all"
-      }
+{
+    "_comment": "Last updated on 2021.4.30 for DP-GEN 0.9.2 by Yuzhi Zhang",
+    "train": {
+        "_comment" : "Specify the installed path of DeePMD-kit",
+        "command": "PATH_TO_DEEPMD/dp",
+        "_comment" : "Specify machine settings",
+        "machine": {
+            "batch_type": "slurm",
+            "context_type": "ssh",
+            "remote_profile": {
+                "hostname": "localhost",
+                "_comment" : "The port for connection, most common settings is 22",
+                "port": 22,
+                "_comment" : "Specify your username.",
+                "username": "USERNAME"
+            },
+            "_comment" : "You should alwasy make sure that directory of work_path exits. ",
+            "remote_root": "PATH_TO_WORK",
+            "local_root": "./"
+        },
+        "resources": {
+            "_comment" : "Environment to be activated. This will generate source PATH/train_new.env . ",
+            "source_list": [
+                "PATH/train_new.env"
+            ],
+            "_comment" : " Module is a common tools on HPC clustes to manage softwares for multiple users.",
+            "_comment" : "Modules to be loaded. This will generate module load intel",
+            "module_list": [
+                "intel"
+            ],
+            "batch_type": "slurm",
+            "_comment" : "The number of nodes. This will generate #SBATCH -N 1 in your script. ",
+            "number_node": 1,
+            "_comment" : "The number of CPUs. #SBATCH -n 4",
+            "cpu_per_node": 4,
+            "_comment" : "The number of GPU cards. #SBATCH --gres=gpu:1", 
+            "gpu_per_node": 1,
+            "queue_name": "all",
+            "custom_flags": [
+                "#SBATCH -t 23:0:0",
+                "#SBATCH --mem=16G",
+                "#SBATCH --exclude=gpu06,gpu07"
+            ],
+            "kwargs": {},
+            "group_size": 1
+        }
     },
-
-  "model_devi":
-    {
-      "machine": {
-        "machine_type": "slurm",
-        "hostname": "localhost",
-        "port": 22,
-        "username": "USERNAME",
-        "work_path": "PATH_TO_WORK",
-        "_comment": "that's all"
-      },
-      "resources": {
-        "numb_node": 1,
-        "numb_gpu": 1,
-        "task_per_node": 4,
-        "partition": "all",
-        "mem_limit": 16,
-        "exclude_list": [
-          
-        ],
-        "source_list": [
-          "PATH/lmp_new.env"
-        ],
-        "module_list": [],
-        "time_limit": "23:0:0",
-        "_comment": "that's all"
-      },
-      "command": "lmp_serial",
-      "_comment" : "DP-GEN will put 5 tasks together in one submitting script.",  
-      "group_size": 5
-    }, 
-  "fp":
-    {
-      "machine": {
-        "machine_type": "slurm",
-        "hostname": "xxx.xxx.xxx.xxx",
-        "port": 22,
-        "username": "USERNAME",
-        "work_path": "PATH_TO_WORK"
-      },
-      "resources": {
-        "task_per_node": 8,
-        "numb_gpu": 0,
-        "exclude_list": [],
-        "_comment" : "If you set with_mpi to true, the defaulted parallelling command of Slurm, srun, will be appended as prefix.",
-        "_comment" : "If you do not want this, you can set with_mpi to false, and specify parallelling command yourself. ",
-        "_comment" : "Notice that in json format, the upper/lower case is strict. You should write true instead of True and false instead of False", 
-        "with_mpi": false,
-        "source_list": [
-        ],
-        "module_list": [
-          "mpich/3.2.1-intel-2017.1"
-        ],
-        "time_limit": "120:0:0",
-        "partition": "C032M0128G",
-        "_comment": "that's all"
-      },
-      "command": "mpirun -n 8 /gpfs/share/home/1600017784/yuzhi/soft/QE-mpi/PW/src/pw.x < input",
-      "group_size": 1
-    }
-}
+    "model_devi": {
+        "machine": {
+            "batch_type": "slurm",
+            "context_type": "ssh",
+            "remote_profile": {
+                "hostname": "localhost",
+                "port": 22,
+                "username": "USERNAME"
+            },
+            "remote_root": "PATH_TO_WORK",
+            "local_root": "./"
+        },
+        "resources": {
+            "source_list": [
+                "PATH/lmp_new.env"
+            ],
+            "module_list": [],
+            "batch_type": "slurm",
+            "_comment": "DP-GEN will put 5 tasks together in one submitting script.",
+            "group_size": 5,
+            "number_node": 1,
+            "cpu_per_node": 4,
+            "gpu_per_node": 1,
+            "queue_name": "all",
+            "custom_flags": [
+                "#SBATCH -t 23:0:0",
+                "#SBATCH --mem=16G",
+                "#SBATCH --exclude="
+            ],
+            "kwargs": {}
+        },
+        "command": "lmp_serial"
+    },
+    "fp": {
+        "machine": {
+            "batch_type": "slurm",
+            "context_type": "ssh",
+            "remote_profile": {
+                "hostname": "xxx.xxx.xxx.xxx",
+                "port": 22,
+                "username": "USERNAME"
+            },
+            "remote_root": "PATH_TO_WORK",
+            "local_root": "./"
+        },
+        "resources": {
+            "source_list": [],
+            "module_list": [
+                "mpich/3.2.1-intel-2017.1"
+            ],
+            "batch_type": "slurm",
+            "group_size": 1,
+            "cpu_per_node": 8,
+            "gpu_per_node": 0,
+            "queue_name": "C032M0128G",
+            "custom_flags": [
+                "#SBATCH -t 120:0:0"
+            ],
+            "kwargs": {},
+            "number_node": 1
+        },
+        "command": "mpirun -n 8 /gpfs/share/home/1600017784/yuzhi/soft/QE-mpi/PW/src/pw.x < input"
+    },
+    "api_version": "1.0"
+}
\ No newline at end of file

From 2e82464b38759c65673e309430b7547d04c46fd8 Mon Sep 17 00:00:00 2001
From: HuangJiameng <105633685+HuangJiameng@users.noreply.github.com>
Date: Fri, 1 Jul 2022 13:23:55 +0800
Subject: [PATCH 11/26] =?UTF-8?q?fix=20=E2=80=98post=5Ffp=5Fcp2k=E2=80=99,?=
 =?UTF-8?q?=20add=20param=20rfailed=20(#765)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix ‘post_fp_cp2k’, add param rfailed

* Update run.py
---
 dpgen/generator/run.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py
index 1bd196cc6..f716e2266 100644
--- a/dpgen/generator/run.py
+++ b/dpgen/generator/run.py
@@ -3342,7 +3342,10 @@ def post_fp_gaussian (iter_index,
 
 
 def post_fp_cp2k (iter_index,
-                      jdata):
+                      jdata,
+                      rfailed=None):
+                      
+    ratio_failed =  rfailed if rfailed else jdata.get('ratio_failed',0.10)
     model_devi_jobs = jdata['model_devi_jobs']
     assert (iter_index < len(model_devi_jobs))
 
@@ -3373,7 +3376,7 @@ def post_fp_cp2k (iter_index,
         all_sys = None
         for oo in sys_output :
             _sys = dpdata.LabeledSystem(oo, fmt = 'cp2k/output')
-            _sys.check_type_map(type_map = jdata['type_map'])
+            #_sys.check_type_map(type_map = jdata['type_map'])
             if all_sys is None:
                 all_sys = _sys
             else:
@@ -3385,8 +3388,12 @@ def post_fp_cp2k (iter_index,
             sys_data_path = os.path.join(work_path, 'data.%s'%ss)
             all_sys.to_deepmd_raw(sys_data_path)
             all_sys.to_deepmd_npy(sys_data_path, set_size = len(sys_output))
-    dlog.info("failed frame number: %s "%(tcount-icount))
-    dlog.info("total frame number: %s "%tcount)
+
+    rfail=float(tcount - icount)/float(tcount)
+    dlog.info("failed frame: %6d in %6d  %6.2f %% " % (tcount - icount, tcount, rfail * 100.))
+
+    if rfail>ratio_failed:
+       raise RuntimeError("find too many unsuccessfully terminated jobs. Too many FP tasks are not converged. Please check your files in directories \'iter.*.*/02.fp/task.*.*/.\'")
 
 
 def post_fp_pwmat (iter_index,

From 4948c81e33a40f1b10017e2c6066b90ad4624055 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 9 Jun 2022 00:51:49 -0400
Subject: [PATCH 12/26] refactor simplify (#730)

* refactor simplify

1. Used `dp model-devi` to calculate model deviation, instead of local calculation. Supported by deepmodeling/deepmd-kit#1618, released in v2.1.1. So the version earlier than 2.1.1 is not supported any more.
2. Assumed all systems are MultiSystems.
3. Removed energy model deviation support

* expand path when getting multisystems

* let `make_train` and `run_train` expand paths

* load numpy array instead

* use dpdata to get nframes

* fix tests

* update README
---
 README.md                  |   6 +-
 dpgen/generator/run.py     |  70 +++----
 dpgen/simplify/simplify.py | 381 +++++++++----------------------------
 dpgen/util.py              |  22 +++
 4 files changed, 140 insertions(+), 339 deletions(-)

diff --git a/README.md b/README.md
index b59725ec9..c833ed059 100644
--- a/README.md
+++ b/README.md
@@ -499,9 +499,8 @@ The bold notation of key (such aas **type_map**) means that it's a necessary key
 | **use_ele_temp** | int | 0 | Currently only support fp_style vasp. 0(default): no electron temperature. 1: eletron temperature as frame parameter. 2: electron temperature as atom parameter.
 | *#Data*
  | init_data_prefix | String | "/sharedext4/.../data/" | Prefix of initial data directories
- | ***init_data_sys*** | List of string|["CH4.POSCAR.01x01x01/.../deepmd"] |Directories of initial data. You may use either absolute or relative path here.
+ | ***init_data_sys*** | List of string|["CH4.POSCAR.01x01x01/.../deepmd"] |Directories of initial data. You may use either absolute or relative path here. Systems will be detected recursively in the directories.
  | ***sys_format*** | String | "vasp/poscar" | Format of initial data. It will be `vasp/poscar` if not set.
- | init_multi_systems | Boolean | false | If set to `true`, `init_data_sys` directories should contain sub-directories of various systems. DP-GEN will regard all of these sub-directories as inital data systems.
  | init_batch_size   | String of integer     | [8]                                                            | Each number is the batch_size of corresponding system  for training in `init_data_sys`. One recommended rule for setting the `sys_batch_size` and `init_batch_size` is that `batch_size` mutiply number of atoms ot the stucture should be larger than 32. If set to `auto`, batch size will be 32 divided by number of atoms. |
   | sys_configs_prefix | String | "/sharedext4/.../data/" | Prefix of `sys_configs`
  | **sys_configs**   | List of list of string         | [<br />["/sharedext4/.../POSCAR"], <br />["....../POSCAR"]<br />] | Containing directories of structures to be explored in iterations.Wildcard characters are supported here. |
@@ -1086,7 +1085,6 @@ Here is an example of `param.json` for QM7 dataset:
         },
         "_comment": "that's all"
     },
-    "use_clusters": true,
     "fp_style": "gaussian",
     "shuffle_poscar": false,
     "fp_task_max": 1000,
@@ -1109,7 +1107,7 @@ Here is an example of `param.json` for QM7 dataset:
 }
 ```
 
-Here `pick_data` is the data to simplify and currently only supports `MultiSystems` containing `System` with `deepmd/npy` format, and `use_clusters` should always be `true`. `init_pick_number` and `iter_pick_number` are the numbers of picked frames. `e_trust_lo`, `e_trust_hi` mean the range of the deviation of the frame energy, and `f_trust_lo` and `f_trust_hi` mean the range of the max deviation of atomic forces in a frame. `fp_style` can only be `gaussian` currently. Other parameters are as the same as those of generator.
+Here `pick_data` is the directory to data to simplify where the program recursively detects systems `System` with `deepmd/npy` format. `init_pick_number` and `iter_pick_number` are the numbers of picked frames. `e_trust_lo`, `e_trust_hi` mean the range of the deviation of the frame energy, and `f_trust_lo` and `f_trust_hi` mean the range of the max deviation of atomic forces in a frame. `fp_style` can only be `gaussian` currently. Other parameters are as the same as those of generator.
 
 
 ## Set up machine
diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py
index d06c137b3..1bd196cc6 100644
--- a/dpgen/generator/run.py
+++ b/dpgen/generator/run.py
@@ -61,7 +61,7 @@
 from dpgen.generator.lib.ele_temp import NBandsEsti
 from dpgen.remote.decide_machine import convert_mdata
 from dpgen.dispatcher.Dispatcher import Dispatcher, _split_tasks, make_dispatcher, make_submission
-from dpgen.util import sepline
+from dpgen.util import sepline, expand_sys_str
 from dpgen import ROOT_PATH
 from pymatgen.io.vasp import Incar,Kpoints,Potcar
 from dpgen.auto_test.lib.vasp import make_kspacing_kpoints
@@ -288,13 +288,10 @@ def make_train (iter_index,
     # make sure all init_data_sys has the batch size -- for the following `zip`
     assert (len(init_data_sys_) <= len(init_batch_size_))
     for ii, ss in zip(init_data_sys_, init_batch_size_) :
-        if jdata.get('init_multi_systems', False):
-            for single_sys in os.listdir(os.path.join(work_path, 'data.init', ii)):
-                init_data_sys.append(os.path.join('..', 'data.init', ii, single_sys))
-                init_batch_size.append(detect_batch_size(ss, os.path.join(work_path, 'data.init', ii, single_sys)))
-        else:
-            init_data_sys.append(os.path.join('..', 'data.init', ii))
-            init_batch_size.append(detect_batch_size(ss, os.path.join(work_path, 'data.init', ii)))
+        sys_paths = expand_sys_str(os.path.join(init_data_prefix, ii))
+        for single_sys in sys_paths:
+            init_data_sys.append(os.path.normpath(os.path.join('..', 'data.init', ii, os.path.relpath(single_sys, os.path.join(init_data_prefix, ii)))))
+            init_batch_size.append(detect_batch_size(ss, single_sys))
     old_range = None
     if iter_index > 0 :
         for ii in range(iter_index) :
@@ -308,25 +305,16 @@ def make_train (iter_index,
                 sys_batch_size = ["auto" for aa in range(len(sys_list))]
             for jj in fp_data_sys :
                 sys_idx = int(jj.split('.')[-1])
-                if jdata.get('use_clusters', False):
-                    nframes = 0
-                    for sys_single in os.listdir(jj):
-                        tmp_box = np.loadtxt(os.path.join(jj, sys_single, 'box.raw'))
-                        tmp_box = np.reshape(tmp_box, [-1,9])
-                        nframes += tmp_box.shape[0]
-                    if nframes < fp_task_min :
-                        log_task('nframes (%d) in data sys %s is too small, skip' % (nframes, jj))
-                        continue
-                    for sys_single in os.listdir(jj):
-                        init_data_sys.append(os.path.join('..', 'data.iters', jj, sys_single))
-                        init_batch_size.append(detect_batch_size(sys_batch_size[sys_idx], os.path.join(jj, sys_single)))
-                else:
-                    nframes = dpdata.System(jj, 'deepmd/npy').get_nframes()
-                    if nframes < fp_task_min :
-                        log_task('nframes (%d) in data sys %s is too small, skip' % (nframes, jj))
-                        continue
-                    init_data_sys.append(os.path.join('..', 'data.iters', jj))
-                    init_batch_size.append(detect_batch_size(sys_batch_size[sys_idx], jj))
+                sys_paths = expand_sys_str(jj)
+                nframes = 0
+                for sys_single in sys_paths:
+                    nframes += dpdata.LabeledSystem(sys_single, fmt="deepmd/npy").get_nframes()
+                if nframes < fp_task_min :
+                    log_task('nframes (%d) in data sys %s is too small, skip' % (nframes, jj))
+                    continue
+                for sys_single in sys_paths:
+                    init_data_sys.append(os.path.normpath(os.path.join('..', 'data.iters', sys_single)))
+                    init_batch_size.append(detect_batch_size(sys_batch_size[sys_idx], sys_single))
     # establish tasks
     jinput = jdata['default_training_param']
     try:
@@ -568,25 +556,17 @@ def run_train (iter_index,
     os.chdir(work_path)
     fp_data = glob.glob(os.path.join('data.iters', 'iter.*', '02.fp', 'data.*'))
     for ii in init_data_sys :
-        if jdata.get('init_multi_systems', False):
-            for single_sys in os.listdir(os.path.join(ii)):
-                trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'set.*'))
-                trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'type*.raw'))
-                trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'nopbc'))
-        else:
-            trans_comm_data += glob.glob(os.path.join(ii, 'set.*'))
-            trans_comm_data += glob.glob(os.path.join(ii, 'type*.raw'))
-            trans_comm_data += glob.glob(os.path.join(ii, 'nopbc'))
+        sys_paths = expand_sys_str(ii)
+        for single_sys in sys_paths:
+            trans_comm_data += glob.glob(os.path.join(single_sys, 'set.*'))
+            trans_comm_data += glob.glob(os.path.join(single_sys, 'type*.raw'))
+            trans_comm_data += glob.glob(os.path.join(single_sys, 'nopbc'))
     for ii in fp_data :
-        if jdata.get('use_clusters', False):
-            for single_sys in os.listdir(os.path.join(ii)):
-                trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'set.*'))
-                trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'type*.raw'))
-                trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'nopbc'))
-        else:
-            trans_comm_data += glob.glob(os.path.join(ii, 'set.*'))
-            trans_comm_data += glob.glob(os.path.join(ii, 'type*.raw'))
-            trans_comm_data += glob.glob(os.path.join(ii, 'nopbc'))
+        sys_paths = expand_sys_str(ii)
+        for single_sys in sys_paths:
+            trans_comm_data += glob.glob(os.path.join(single_sys, 'set.*'))
+            trans_comm_data += glob.glob(os.path.join(single_sys, 'type*.raw'))
+            trans_comm_data += glob.glob(os.path.join(single_sys, 'nopbc'))
     os.chdir(cwd)
 
     try:
diff --git a/dpgen/simplify/simplify.py b/dpgen/simplify/simplify.py
index 982db3114..529401519 100644
--- a/dpgen/simplify/simplify.py
+++ b/dpgen/simplify/simplify.py
@@ -9,6 +9,7 @@
 02: fp (optional, if the original dataset do not have fp data, same as generator)
 """
 import logging
+import warnings
 import queue
 import os
 import json
@@ -21,7 +22,7 @@
 
 from dpgen import dlog
 from dpgen import SHORT_CMD
-from dpgen.util import sepline
+from dpgen.util import sepline, expand_sys_str
 from distutils.version import LooseVersion
 from dpgen.dispatcher.Dispatcher import Dispatcher, _split_tasks, make_dispatcher, make_submission
 from dpgen.generator.run import make_train, run_train, post_train, run_fp, post_fp, fp_name, model_devi_name, train_name, train_task_fmt, sys_link_fp_vasp_pp, make_fp_vasp_incar, make_fp_vasp_kp, make_fp_vasp_cp_cvasp, data_system_fmt, model_devi_task_fmt, fp_task_fmt
@@ -38,17 +39,6 @@
 sys_name_fmt = 'sys.' + data_system_fmt
 sys_name_pattern = 'sys.[0-9]*[0-9]'
 
-def expand_sys_str(root_dir):
-    matches = []
-    for root, dirnames, filenames in os.walk(root_dir, followlinks=True):
-        for filename in fnmatch.filter(filenames, 'type.raw'):
-            matches.append(root)
-    matches.sort()
-    dirnames = [os.path.basename(ii) for ii in matches]
-    if (len(list(set(dirnames))) != len(matches)) :
-        raise RuntimeError('duplicated system name: it is highly recommend to place all systems in the same level of directory and has different names')
-    return matches
-
 
 def get_system_cls(jdata):
     if jdata.get("labeled", False):
@@ -58,28 +48,12 @@ def get_system_cls(jdata):
 
 def get_multi_system(path, jdata):
     system = get_system_cls(jdata)
+    system_paths = expand_sys_str(path)
     systems = dpdata.MultiSystems(
-        *[system(os.path.join(path, s), fmt='deepmd/npy') for s in os.listdir(path)])
-    return systems
-
-
-def get_systems(path, jdata):
-    system_cls = get_system_cls(jdata)
-    system_paths = expand_sys_str(path)    
-    systems = {}
-    for ii in system_paths:
-        systems[os.path.basename(ii)] = system_cls(ii, fmt='deepmd/npy')
+        *[system(s, fmt='deepmd/npy') for s in system_paths])
     return systems
 
 
-def get_system_idx(path):
-    system_paths = expand_sys_str(path)    
-    sys_idx_map = {}
-    for idx,ii in enumerate(system_paths):
-        sys_idx_map[os.path.basename(ii)] = idx
-    return sys_idx_map
-
-
 def init_model(iter_index, jdata, mdata):
     training_init_model = jdata.get('training_init_model', False)
     if not training_init_model:
@@ -111,20 +85,13 @@ def init_pick(iter_index, jdata, mdata):
     """pick up init data from dataset randomly"""
     pick_data = jdata['pick_data']
     init_pick_number = jdata['init_pick_number']
-    use_clusters = jdata.get('use_clusters', False)
     # use MultiSystems with System
     # TODO: support System and LabeledSystem
     # TODO: support other format
-    if use_clusters:
-        systems = get_multi_system(pick_data, jdata)
-    else:
-        systems = get_systems(pick_data, jdata)
+    systems = get_multi_system(pick_data, jdata)
     # label the system
     labels = []
-    if use_clusters:
-        items = systems.systems.items()
-    else:
-        items = systems.items()
+    items = systems.systems.items()
     for key, system in items:
         labels.extend([(key, j) for j in range(len(system))])
 
@@ -146,48 +113,18 @@ def init_pick(iter_index, jdata, mdata):
     _init_dump_selected_frames(systems, labels, rest_idx, sys_data_path, jdata)
 
 
-def _add_system(systems, key, system):
-    if key in systems.keys():
-        systems[key].append(system)
-    else:
-        systems[key] = system
-    return systems
-
-
 def _init_dump_selected_frames(systems, labels, selc_idx, sys_data_path, jdata):
-    pick_data = jdata['pick_data']
-    use_clusters = jdata.get('use_clusters', False)
-    if use_clusters:
-        selc_systems = dpdata.MultiSystems()
-        for j in selc_idx:
-            sys_name, sys_id = labels[j]
-            selc_systems.append(systems[sys_name][sys_id])
-        selc_systems.to_deepmd_raw(sys_data_path)
-        selc_systems.to_deepmd_npy(sys_data_path, set_size=selc_idx.size)
-    else:
-        selc_systems = {}
-        for j in selc_idx:
-            sys_name, sys_id = labels[j]
-            selc_systems = _add_system(selc_systems, sys_name, systems[sys_name][sys_id])
-        sys_idx_map = get_system_idx(pick_data)
-        for kk in selc_systems.keys():
-            sub_path = os.path.join(sys_data_path, sys_name_fmt % sys_idx_map[kk])
-            selc_systems[kk].to_deepmd_raw(sub_path)
-            selc_systems[kk].to_deepmd_npy(sub_path, set_size=selc_idx.size)
-        with open(os.path.join(sys_data_path, 'sys_idx_map.json'), 'w') as fp:
-            json.dump(sys_idx_map, fp, indent=4)
-
-def _dump_system_dict(systems, path):
-    for kk in systems:
-        sub_path = os.path.join(path, sys_name_fmt % (int(kk)))
-        systems[kk].to_deepmd_raw(sub_path)
-        systems[kk].to_deepmd_npy(sub_path, set_size=systems[kk].get_nframes())
+    selc_systems = dpdata.MultiSystems()
+    for j in selc_idx:
+        sys_name, sys_id = labels[j]
+        selc_systems.append(systems[sys_name][sys_id])
+    selc_systems.to_deepmd_raw(sys_data_path)
+    selc_systems.to_deepmd_npy(sys_data_path, set_size=selc_idx.size)
 
 
 def make_model_devi(iter_index, jdata, mdata):
     """calculate the model deviation of the rest idx"""
     pick_data = jdata['pick_data']
-    use_clusters = jdata.get('use_clusters', False)
     iter_name = make_iter_name(iter_index)
     work_path = os.path.join(iter_name, model_devi_name)
     create_path(work_path)
@@ -203,25 +140,7 @@ def make_model_devi(iter_index, jdata, mdata):
     rest_data_path = os.path.join(last_iter_name, model_devi_name, rest_data_name)
     if not os.path.exists(rest_data_path):
         return False
-    if use_clusters:
-        for jj, subsystem in enumerate(os.listdir(rest_data_path)):
-            task_name = "task." + model_devi_task_fmt % (0, jj)
-            task_path = os.path.join(work_path, task_name)
-            create_path(task_path)
-            os.symlink(os.path.abspath(os.path.join(rest_data_path, subsystem)),
-                       os.path.abspath(os.path.join(task_path, rest_data_name)))
-    else:
-        rest_data_path = os.path.abspath(rest_data_path)
-        sys_path = glob.glob(os.path.join(rest_data_path, sys_name_pattern))
-        cwd = os.getcwd()
-        for ii in sys_path:
-            task_name = "task." + model_devi_task_fmt % (int(os.path.basename(ii).split('.')[1]), 0)
-            task_path = os.path.join(work_path, task_name)
-            create_path(task_path)            
-            os.chdir(task_path)
-            os.symlink(os.path.relpath(ii), rest_data_name)
-            os.chdir(cwd)
-        os.chdir(cwd)
+    os.symlink(os.path.abspath(rest_data_path), os.path.join(work_path, rest_data_name + ".old"))
     return True
 
 
@@ -231,43 +150,28 @@ def run_model_devi(iter_index, jdata, mdata):
     work_path = os.path.join(iter_name, model_devi_name)
     # generate command
     commands = []
-    tasks = glob.glob(os.path.join(work_path, "task.*"))
-    run_tasks = [os.path.basename(ii) for ii in tasks]
+    run_tasks = ["."]
     # get models
     models = glob.glob(os.path.join(work_path, "graph*pb"))
     model_names = [os.path.basename(ii) for ii in models]
     task_model_list = []
     for ii in model_names:
-        task_model_list.append(os.path.join('..', ii))
-    # get max data size
-    data_size = max([len(dpdata.System(os.path.join(
-        task, rest_data_name), fmt="deepmd/npy")) for task in tasks])
+        task_model_list.append(os.path.join('.', ii))
     # models
     commands = []
-    detail_file_names = []
-    for ii, mm in enumerate(task_model_list):
-        detail_file_name = "{prefix}-{ii}".format(
-            prefix=detail_file_name_prefix,
-            ii=ii,
-        )
-        # TODO: support 0.x?
-        command = "{python} -m deepmd test -m {model} -s {system} -n {numb_test} -d {detail_file}".format(
-            python=mdata['python_test_path'],
-            model=mm,
-            system=rest_data_name,
-            numb_test=data_size,
-            detail_file=detail_file_name,
-        )
-        commands.append(command)
-        detail_file_names.append(detail_file_name)
+    detail_file_name = detail_file_name_prefix
+    command = "{dp} model-devi -m {model} -s {system} -o {detail_file}".format(
+        dp=mdata.get('model_devi_command', 'dp'),
+        model=" ".join(task_model_list),
+        system=rest_data_name + ".old",
+        detail_file=detail_file_name,
+    )
+    commands = [command]
     # submit
-    try:
-        model_devi_group_size = mdata['model_devi_group_size']
-    except Exception:
-        model_devi_group_size = 1
+    model_devi_group_size = mdata.get('model_devi_group_size', 1)
 
-    forward_files = [rest_data_name]
-    backward_files = sum([[pf+".e.out", pf+".f.out", pf+".v.out"] for pf in detail_file_names], [])
+    forward_files = [rest_data_name + ".old"]
+    backward_files = [detail_file_name]
 
     api_version = mdata.get('api_version', '0.9')
     if LooseVersion(api_version) < LooseVersion('1.0'):
@@ -303,102 +207,50 @@ def run_model_devi(iter_index, jdata, mdata):
 
 def post_model_devi(iter_index, jdata, mdata):
     """calculate the model deviation"""
-    use_clusters = jdata.get('use_clusters', False)
     iter_name = make_iter_name(iter_index)
     work_path = os.path.join(iter_name, model_devi_name)
-    tasks = glob.glob(os.path.join(work_path, "task.*"))
-    tasks.sort()
-
-    e_trust_lo = jdata['e_trust_lo']
-    e_trust_hi = jdata['e_trust_hi']
-    f_trust_lo = jdata['f_trust_lo']
-    f_trust_hi = jdata['f_trust_hi']
-
-    if use_clusters:
-        sys_accurate = dpdata.MultiSystems()
-        sys_candinate = dpdata.MultiSystems()
-        sys_failed = dpdata.MultiSystems()
-    else:
-        sys_accurate = {}
-        sys_candinate = {}
-        sys_failed = {}
-        all_names = set()
-
-    for task in tasks:
-        if not use_clusters:
-            sys_name = os.path.basename(task).split('.')[1]
-            all_names.add(sys_name)
-        # e.out
-        details_e = glob.glob(os.path.join(task, "{}-*.e.out".format(detail_file_name_prefix)))
-        e_all = np.array([np.loadtxt(detail_e, ndmin=2)[:, 1] for detail_e in details_e])
-        e_std = np.std(e_all, axis=0)
-        n_frame = e_std.size
-        
-        # f.out
-        details_f = glob.glob(os.path.join(task, "{}-*.f.out".format(detail_file_name_prefix)))
-        f_all = np.array([np.loadtxt(detail_f, ndmin=2)[:, 3:6].reshape((n_frame, -1, 3)) for detail_f in details_f])
-        # (n_model, n_frame, n_atom, 3)
-        f_std = np.std(f_all, axis=0)
-        # (n_frame, n_atom, 3)
-        f_std = np.linalg.norm(f_std, axis=2)
-        # (n_frame, n_atom)
-        f_std = np.max(f_std, axis=1)
-        # (n_frame,)
-
-        system_cls = get_system_cls(jdata)
-        for subsys, e_devi, f_devi in zip(system_cls(os.path.join(task, rest_data_name), fmt='deepmd/npy'), e_std, f_std):
-            if (e_devi < e_trust_hi and e_devi >= e_trust_lo) or (f_devi < f_trust_hi and f_devi >= f_trust_lo) :
-                if use_clusters:
+
+    f_trust_lo = jdata['model_devi_f_trust_lo']
+    f_trust_hi = jdata['model_devi_f_trust_hi']
+
+    sys_accurate = dpdata.MultiSystems()
+    sys_candinate = dpdata.MultiSystems()
+    sys_failed = dpdata.MultiSystems()
+
+    sys_entire = dpdata.MultiSystems().from_deepmd_npy(os.path.join(work_path, rest_data_name + ".old"))
+
+    detail_file_name = detail_file_name_prefix
+    with open(os.path.join(work_path, detail_file_name)) as f:
+        for line in f:
+            if line.startswith("# data.rest.old"):
+                name = (line.split()[1]).split("/")[-1]
+            elif line.startswith("#"):
+                pass
+            else:
+                idx = int(line.split()[0])
+                f_devi = float(line.split()[4])
+                subsys = sys_entire[name][idx]
+                if f_trust_lo <= f_devi < f_trust_hi:
                     sys_candinate.append(subsys)
-                else:
-                    sys_candinate = _add_system(sys_candinate, sys_name, subsys)
-            elif (e_devi >= e_trust_hi ) or (f_devi >= f_trust_hi ):
-                if use_clusters:
+                elif f_devi >= f_trust_hi:
                     sys_failed.append(subsys)
-                else:
-                    sys_failed = _add_system(sys_failed, sys_name, subsys)
-            elif (e_devi < e_trust_lo and f_devi < f_trust_lo ):
-                if use_clusters:
+                elif f_devi < f_trust_lo:
                     sys_accurate.append(subsys)
                 else:
-                    sys_accurate = _add_system(sys_accurate, sys_name, subsys)
-            else:
-                raise RuntimeError('reach a place that should NOT be reached...')
-    if use_clusters:
-        counter = {"candidate": sys_candinate.get_nframes(), "accurate": sys_accurate.get_nframes(), "failed": sys_failed.get_nframes()}
-        fp_sum = sum(counter.values())
-        for cc_key, cc_value in counter.items():
-            dlog.info("{0:9s} : {1:6d} in {2:6d} {3:6.2f} %".format(cc_key, cc_value, fp_sum, cc_value/fp_sum*100))
-    else:
-        all_names = list(all_names)
-        all_names.sort()
-        counter = {"candidate": 0, "accurate": 0, "failed": 0}
-        for kk in all_names:
-            sys_counter = {"candidate": 0, "accurate": 0, "failed": 0}
-            if kk in sys_candinate.keys():
-                sys_counter['candidate'] += sys_candinate[kk].get_nframes()
-            if kk in sys_accurate.keys():
-                sys_counter['accurate'] += sys_accurate[kk].get_nframes()
-            if kk in sys_failed.keys():
-                sys_counter['failed'] += sys_failed[kk].get_nframes()
-            fp_sum = sum(sys_counter.values())
-            for cc_key, cc_value in sys_counter.items():
-                if fp_sum != 0:
-                    dlog.info("sys{0:s} {1:9s} : {2:6d} in {3:6d} {4:6.2f} %".format(kk, cc_key, cc_value, fp_sum, cc_value/fp_sum*100))
-                else:
-                    dlog.info("sys{0:s} {1:9s} : {2:6d} in {3:6d} {4:6.2f} %".format(kk, cc_key, cc_value, fp_sum, 0*100))
-            for ii in ['candidate', 'accurate', 'failed']:
-                counter[ii] += sys_counter[ii]
+                    raise RuntimeError('reach a place that should NOT be reached...')
+
+    counter = {"candidate": sys_candinate.get_nframes(), "accurate": sys_accurate.get_nframes(), "failed": sys_failed.get_nframes()}
+    fp_sum = sum(counter.values())
+    for cc_key, cc_value in counter.items():
+        dlog.info("{0:9s} : {1:6d} in {2:6d} {3:6.2f} %".format(cc_key, cc_value, fp_sum, cc_value/fp_sum*100))
     
     if counter['candidate'] == 0 and counter['failed'] > 0:
         raise RuntimeError('no candidate but still have failed cases, stop. You may want to refine the training or to increase the trust level hi')
 
     # label the candidate system
     labels = []
-    if use_clusters:
-        items = sys_candinate.systems.items()
-    else:
-        items = sys_candinate.items()
+    items = sys_candinate.systems.items()
+
     for key, system in items:
         labels.extend([(key, j) for j in range(len(system))])
     # candinate: pick up randomly
@@ -412,112 +264,61 @@ def post_model_devi(iter_index, jdata, mdata):
               (counter['candidate'], len(pick_idx), float(len(pick_idx))/counter['candidate']*100., len(rest_idx), float(len(rest_idx))/counter['candidate']*100.))
 
     # dump the picked candinate data
-    if use_clusters:
-        picked_systems = dpdata.MultiSystems()
-        for j in pick_idx:
-            sys_name, sys_id = labels[j]
-            picked_systems.append(sys_candinate[sys_name][sys_id])
-        sys_data_path = os.path.join(work_path, picked_data_name)
-        picked_systems.to_deepmd_raw(sys_data_path)
-        picked_systems.to_deepmd_npy(sys_data_path, set_size=iter_pick_number)
-    else:
-        selc_systems = {}
-        for j in pick_idx:
-            sys_name, sys_id = labels[j]
-            selc_systems = _add_system(selc_systems, sys_name, sys_candinate[sys_name][sys_id])
-        sys_data_path = os.path.join(work_path, picked_data_name)
-        _dump_system_dict(selc_systems, sys_data_path)
+    picked_systems = dpdata.MultiSystems()
+    for j in pick_idx:
+        sys_name, sys_id = labels[j]
+        picked_systems.append(sys_candinate[sys_name][sys_id])
+    sys_data_path = os.path.join(work_path, picked_data_name)
+    picked_systems.to_deepmd_raw(sys_data_path)
+    picked_systems.to_deepmd_npy(sys_data_path, set_size=iter_pick_number)
+
 
     # dump the rest data (not picked candinate data and failed data)
-    if use_clusters:
-        rest_systems = dpdata.MultiSystems()
-        for j in rest_idx:
-            sys_name, sys_id = labels[j]
-            rest_systems.append(sys_candinate[sys_name][sys_id])
-        rest_systems += sys_failed
-        sys_data_path = os.path.join(work_path, rest_data_name)
-        rest_systems.to_deepmd_raw(sys_data_path)
+    rest_systems = dpdata.MultiSystems()
+    for j in rest_idx:
+        sys_name, sys_id = labels[j]
+        rest_systems.append(sys_candinate[sys_name][sys_id])
+    rest_systems += sys_failed
+    sys_data_path = os.path.join(work_path, rest_data_name)
+    rest_systems.to_deepmd_raw(sys_data_path)
+    if rest_idx.size:
         rest_systems.to_deepmd_npy(sys_data_path, set_size=rest_idx.size)
-    else:
-        selc_systems = {}
-        for j in rest_idx:
-            sys_name, sys_id = labels[j]
-            selc_systems = _add_system(selc_systems, sys_name, sys_candinate[sys_name][sys_id])
-        for kk in sys_failed.keys():
-            selc_systems = _add_system(selc_systems, kk, sys_failed[kk])        
-        sys_data_path = os.path.join(work_path, rest_data_name)
-        _dump_system_dict(selc_systems, sys_data_path)
+
 
     # dump the accurate data -- to another directory
-    if use_clusters:
-        sys_data_path = os.path.join(work_path, accurate_data_name)
-        sys_accurate.to_deepmd_raw(sys_data_path)
-        sys_accurate.to_deepmd_npy(sys_data_path, set_size=sys_accurate.get_nframes())
-    else:
-        sys_data_path = os.path.join(work_path, accurate_data_name)
-        _dump_system_dict(sys_accurate, sys_data_path)
+    sys_data_path = os.path.join(work_path, accurate_data_name)
+    sys_accurate.to_deepmd_raw(sys_data_path)
+    sys_accurate.to_deepmd_npy(sys_data_path, set_size=sys_accurate.get_nframes())
 
 
 def make_fp_labeled(iter_index, jdata):    
     dlog.info("already labeled, skip make_fp and link data directly")
     pick_data = jdata['pick_data']
-    use_clusters = jdata.get('use_clusters', False)
     iter_name = make_iter_name(iter_index)
     work_path = os.path.join(iter_name, fp_name)
     create_path(work_path)
     picked_data_path = os.path.join(iter_name, model_devi_name, picked_data_name)
-    if use_clusters:
-        os.symlink(os.path.abspath(picked_data_path), os.path.abspath(
-            os.path.join(work_path, "task." + data_system_fmt % 0)))
-        os.symlink(os.path.abspath(picked_data_path), os.path.abspath(
-            os.path.join(work_path, "data." + data_system_fmt % 0)))
-    else:
-        picked_data_path = os.path.abspath(picked_data_path)
-        sys_path = glob.glob(os.path.join(picked_data_path, sys_name_pattern))
-        cwd = os.getcwd()
-        os.chdir(work_path)
-        for ii in sys_path:
-            sys_idx = os.path.basename(ii).split('.')[1]
-            data_dir = 'data.' + data_system_fmt % int(sys_idx)
-            task_dir = 'task.' + data_system_fmt % int(sys_idx)
-            os.symlink(os.path.relpath(ii), data_dir)
-            os.symlink(os.path.relpath(ii), task_dir)
-        os.chdir(cwd)
+    os.symlink(os.path.abspath(picked_data_path), os.path.abspath(
+        os.path.join(work_path, "task." + data_system_fmt % 0)))
+    os.symlink(os.path.abspath(picked_data_path), os.path.abspath(
+        os.path.join(work_path, "data." + data_system_fmt % 0)))
 
 
 def make_fp_configs(iter_index, jdata):
     pick_data = jdata['pick_data']
-    use_clusters = jdata.get('use_clusters', False)
     iter_name = make_iter_name(iter_index)
     work_path = os.path.join(iter_name, fp_name)
     create_path(work_path)
     picked_data_path = os.path.join(iter_name, model_devi_name, picked_data_name)
-    if use_clusters:
-        systems = get_multi_system(picked_data_path, jdata)
-        jj = 0
-        for system in systems:
-            for subsys in system:
-                task_name = "task." + fp_task_fmt % (0, jj)
-                task_path = os.path.join(work_path, task_name)
-                create_path(task_path)
-                subsys.to('vasp/poscar', os.path.join(task_path, 'POSCAR'))
-                jj += 1
-    else:
-        picked_data_path = os.path.abspath(picked_data_path)
-        sys_path = glob.glob(os.path.join(picked_data_path, sys_name_pattern))
-        for ii in sys_path:
-            tmp_sys = dpdata.System(ii, fmt = 'deepmd/npy')
-            sys_idx = os.path.basename(ii).split('.')[1]
-            jj = 0
-            for ss in tmp_sys:
-                task_name = "task." + fp_task_fmt % (int(sys_idx), jj)
-                task_path = os.path.join(work_path, task_name)
-                create_path(task_path)
-                ss.to('vasp/poscar', os.path.join(task_path, 'POSCAR'))
-                job = {}
-                with open(os.path.join(task_path, 'job.json'), 'w') as fp:
-                    json.dump(job, fp, indent=4)
-                jj += 1
+    systems = get_multi_system(picked_data_path, jdata)
+    jj = 0
+    for system in systems:
+        for subsys in system:
+            task_name = "task." + fp_task_fmt % (0, jj)
+            task_path = os.path.join(work_path, task_name)
+            create_path(task_path)
+            subsys.to('vasp/poscar', os.path.join(task_path, 'POSCAR'))
+            jj += 1
 
 
 def make_fp_gaussian(iter_index, jdata):
diff --git a/dpgen/util.py b/dpgen/util.py
index aa805e7e5..9491cdc30 100644
--- a/dpgen/util.py
+++ b/dpgen/util.py
@@ -1,5 +1,7 @@
 #!/usr/bin/env python
 # coding: utf-8
+from typing import Union, List
+from pathlib import Path
 
 from dpgen import dlog
 
@@ -25,3 +27,23 @@ def box_center(ch='',fill=' ',sp="|"):
     '''
     strs=ch.center(Len,fill)
     dlog.info(sp+strs[1:len(strs)-1:]+sp)
+
+
+def expand_sys_str(root_dir: Union[str, Path]) -> List[str]:
+    """Recursively iterate over directories taking those that contain `type.raw` file.
+
+    Parameters
+    ----------
+    root_dir : Union[str, Path]
+        starting directory
+
+    Returns
+    -------
+    List[str]
+        list of string pointing to system directories
+    """
+    root_dir = Path(root_dir)
+    matches = [str(d) for d in root_dir.rglob("*") if (d / "type.raw").is_file()]
+    if (root_dir / "type.raw").is_file():
+        matches.append(str(root_dir))
+    return matches

From 8a27df371211264bcc302ae05b88fd9058c4f1b8 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 9 Jun 2022 00:53:16 -0400
Subject: [PATCH 13/26] supports non-list mdata (#748)

* supports non-list mdata

The mdata of a task is a list of a single dict. This "list" looks useless and not clear enough. So this commit supports using the dict without a list.

Note that old parameters are still supported, so no breaking changes are made. The "list" is just removed from all examples. Both list and non-list parameters are in the unittest.

* fix typo
---
 README.md                                     | 26 +++++++------------
 doc/run/example-of-machine.md                 | 11 +++-----
 dpgen/remote/decide_machine.py                | 13 +++++++---
 .../machine/DeePMD-kit-1.x/machine-ali.json   | 13 ++++------
 .../machine-lsf-slurm-cp2k.json               | 13 ++++------
 .../DeePMD-kit-1.x/machine-slurm-qe.json      | 12 +++------
 .../DeePMD-kit-2.x/lebesgue_v2_machine.json   | 11 ++++----
 tests/tools/machine_fp_single2.json           | 14 ++++++++++
 tests/tools/test_convert_mdata.py             | 10 +++++--
 9 files changed, 65 insertions(+), 58 deletions(-)
 create mode 100644 tests/tools/machine_fp_single2.json

diff --git a/README.md b/README.md
index c833ed059..fb280e226 100644
--- a/README.md
+++ b/README.md
@@ -1137,7 +1137,7 @@ an example of new dpgen's machine.json
 ```json
 {
   "api_version": "1.0",
-  "train": [
+  "train":
     {
       "command": "dp",
       "machine": {
@@ -1161,9 +1161,8 @@ an example of new dpgen's machine.json
         "para_deg": 3,
         "source_list": ["/home/user1234/deepmd.1.2.4.env"]
       }
-    }
-  ],
-  "model_devi":[
+    },
+  "model_devi":
     {
       "command": "lmp",
       "machine":{
@@ -1184,9 +1183,8 @@ an example of new dpgen's machine.json
         "group_size": 5,
         "source_list": ["/home/user1234/deepmd.1.2.4.env"]
       }
-    }
-  ],
-  "fp":[
+    },
+  "fp":
     {
       "command": "vasp_std",
       "machine":{
@@ -1208,7 +1206,6 @@ an example of new dpgen's machine.json
         "source_list": ["~/vasp.env"]
       }
     }
-  ]
 }
 ```
 note1: the key "local_root" in dpgen's machine.json is always `./`
@@ -1220,7 +1217,7 @@ When switching into a new machine, you may modifying the `MACHINE`, according to
 An example for `MACHINE` is:
 ```json
 {
-  "train": [
+  "train":
     {
       "machine": {
         "batch": "slurm",
@@ -1243,9 +1240,8 @@ An example for `MACHINE` is:
         "qos": "data"
       },
       "command": "USERPATH/dp"
-    }
-  ],
-  "model_devi": [
+    },
+  "model_devi":
     {
       "machine": {
         "batch": "slurm",
@@ -1269,9 +1265,8 @@ An example for `MACHINE` is:
       },
       "command": "lmp_serial",
       "group_size": 1
-    }
-  ],
-  "fp": [
+    },
+  "fp":
     {
       "machine": {
         "batch": "slurm",
@@ -1298,7 +1293,6 @@ An example for `MACHINE` is:
       "command": "vasp_gpu",
       "group_size": 1
     }
-  ]
 }
 ```
 Following table illustrates which key is needed for three types of machine: `train`,`model_devi`  and `fp`. Each of them is a list of dicts. Each dict can be considered as an independent environmnet for calculation.
diff --git a/doc/run/example-of-machine.md b/doc/run/example-of-machine.md
index 569f85026..247c50e4f 100644
--- a/doc/run/example-of-machine.md
+++ b/doc/run/example-of-machine.md
@@ -20,7 +20,7 @@ In this section, we will show you how to perform train task at a local workstati
 In this example, we perform the `train` task on a local workstation.
 
 ```json
-"train": [
+"train":
     {
       "command": "dp",
       "machine": {
@@ -36,8 +36,7 @@ In this example, we perform the `train` task on a local workstation.
         "group_size": 1,
         "source_list": ["/home/user1234/deepmd.env"]
       }
-    }
-  ],
+    },
 ```
 
 The "command" for the train task in the DeePMD-kit is "dp".
@@ -51,7 +50,7 @@ In the resources parameter, "number_node", "cpu_per_node", and "gpu_per_node" sp
 In this example, we perform the model_devi task at a local Slurm workstation.
 
 ```json
-"model_devi": [
+"model_devi":
     {
       "command": "lmp",
       "machine": {
@@ -70,7 +69,6 @@ In this example, we perform the model_devi task at a local Slurm workstation.
         "source_list": ["/home/user1234/lammps.env"]
       }
     }
-],
 ```
 
 The "command" for the model_devi task in the LAMMPS is "lmp".
@@ -84,7 +82,7 @@ In the resources parameter, we specify the name of the queue to which the task i
 In this example, we perform the fp task at a remote PBS cluster that can be accessed via SSH.
 
 ```json
-"fp": [
+"fp":
     {
       "command": "mpirun -n 32 vasp_std",
       "machine": {
@@ -106,7 +104,6 @@ In this example, we perform the fp task at a remote PBS cluster that can be acce
         "source_list": ["/home/user1234/vasp.env"]
       }
     }
-],
 ```
 
 VASP code is used for fp task and mpi is used for parallel computing, so "mpirun -n 32" is added to specify the number of parallel threads.
diff --git a/dpgen/remote/decide_machine.py b/dpgen/remote/decide_machine.py
index 31691f322..c551be44b 100644
--- a/dpgen/remote/decide_machine.py
+++ b/dpgen/remote/decide_machine.py
@@ -36,11 +36,18 @@ def convert_mdata(mdata, task_types=["train", "model_devi", "fp"]):
     '''
     for task_type in task_types:
         if task_type in mdata:
-            for key, item in mdata[task_type][0].items():
+            if isinstance(mdata[task_type], dict):
+                task_data = mdata[task_type]
+            elif isinstance(mdata[task_type], (list, tuple)):
+                task_data = mdata[task_type][0]
+            else:
+                raise TypeError("mdata/%s should be dict or list!" % task_type)
+            for key, item in task_data.items():
                 if "comments" not in key:
                     mdata[task_type + "_" + key] = item
-            group_size = mdata[task_type][0]["resources"].get("group_size", 1)
-            if group_size == 1: group_size = mdata[task_type][0].get("group_size", 1)
+            group_size = task_data["resources"].get("group_size", 1)
+            if group_size == 1:
+                group_size = task_data.get("group_size", 1)
             mdata[task_type + "_" + "group_size"] = group_size
     return mdata
 
diff --git a/examples/machine/DeePMD-kit-1.x/machine-ali.json b/examples/machine/DeePMD-kit-1.x/machine-ali.json
index a2a338af4..e78fc9dd4 100644
--- a/examples/machine/DeePMD-kit-1.x/machine-ali.json
+++ b/examples/machine/DeePMD-kit-1.x/machine-ali.json
@@ -1,5 +1,5 @@
 {
-  "train": [
+  "train":
     {
       "machine": {
         "batch": "shell",
@@ -34,10 +34,9 @@
       },
       "command": "/root/deepmd-kit/bin/dp",
       "group_size": 2
-    }
-  ],
+    },
 
-  "model_devi": [
+  "model_devi":
     {
       "machine": {
         "batch": "shell",
@@ -71,10 +70,9 @@
       },
       "command": "/root/deepmd-kit/bin/lmp",
       "group_size": 2
-    }
-  ],
+    },
 
-  "fp": [
+  "fp":
     {
       "machine": {
         "batch": "shell",
@@ -108,7 +106,6 @@
       "command": "mpirun -n 16 /root/deepmd-pkg/vasp.5.4.4/bin/vasp_std",
       "group_size": 1
     }
-  ]
 }
 
 
diff --git a/examples/machine/DeePMD-kit-1.x/machine-lsf-slurm-cp2k.json b/examples/machine/DeePMD-kit-1.x/machine-lsf-slurm-cp2k.json
index b56d022ec..4fb5845ee 100644
--- a/examples/machine/DeePMD-kit-1.x/machine-lsf-slurm-cp2k.json
+++ b/examples/machine/DeePMD-kit-1.x/machine-lsf-slurm-cp2k.json
@@ -1,5 +1,5 @@
 {
-  "train": [
+  "train":
     {
       "machine": {
         "machine_type": "slurm",
@@ -25,9 +25,8 @@
         "submit_wait_time": 60
       },
       "python_path": "/share/apps/deepmd/compress/bin/python3.8"
-    }
-  ],
-  "model_devi": [
+    },
+  "model_devi":
     {
       "machine": {
         "machine_type": "slurm",
@@ -54,9 +53,8 @@
       },
       "command": "lmp_mpi",
       "group_size": 5
-    }
-  ],
-  "fp": [
+    },
+  "fp":
     {
       "machine": {
         "machine_type": "lsf",
@@ -87,5 +85,4 @@
       "command": "cp2k.popt -i input.inp",
       "group_size": 50
     }
-  ]
 }
diff --git a/examples/machine/DeePMD-kit-1.x/machine-slurm-qe.json b/examples/machine/DeePMD-kit-1.x/machine-slurm-qe.json
index 22a3fdbbd..5f15303d6 100644
--- a/examples/machine/DeePMD-kit-1.x/machine-slurm-qe.json
+++ b/examples/machine/DeePMD-kit-1.x/machine-slurm-qe.json
@@ -1,7 +1,7 @@
  {
   "_comment" : "This is an example of DP-GEN on Slurm",
   "_comment" : "Last updated on 2021.4.30 for DP-GEN 0.9.2 by Yuzhi Zhang", 
-  "train" :[
+  "train" :
     {
       "_comment" : "Specify the installed path of DeePMD-kit",
       "_comment" : "The version of DeePMD-kit should be 1.*", 
@@ -49,10 +49,9 @@
         "time_limit": "23:0:0",
         "_comment": "that's all"
       }
-    }
-  ],
+    },
 
-  "model_devi": [
+  "model_devi":
     {
       "machine": {
         "machine_type": "slurm",
@@ -81,10 +80,8 @@
       "command": "lmp_serial",
       "_comment" : "DP-GEN will put 5 tasks together in one submitting script.",  
       "group_size": 5
-    }
-  ], 
+    }, 
   "fp":
-  [
     {
       "machine": {
         "machine_type": "slurm",
@@ -113,5 +110,4 @@
       "command": "mpirun -n 8 /gpfs/share/home/1600017784/yuzhi/soft/QE-mpi/PW/src/pw.x < input",
       "group_size": 1
     }
-  ]
 }
diff --git a/examples/machine/DeePMD-kit-2.x/lebesgue_v2_machine.json b/examples/machine/DeePMD-kit-2.x/lebesgue_v2_machine.json
index 6b9ead467..0ecba4fa6 100644
--- a/examples/machine/DeePMD-kit-2.x/lebesgue_v2_machine.json
+++ b/examples/machine/DeePMD-kit-2.x/lebesgue_v2_machine.json
@@ -1,7 +1,7 @@
 {
   "api_version": "1.0",
   "deepmd_version": "2.0.1",
-  "train" :[
+  "train" :
     {
       "command": "dp",
       "machine": {
@@ -34,9 +34,9 @@
         "queue_name": "GPU",
         "group_size": 1
       }
-    }],
+    },
   "model_devi":
-    [{
+    {
       "command": "lmp -i input.lammps -v restart 0",
       "machine": {
         "batch_type": "DpCloudServer",
@@ -68,9 +68,9 @@
         "queue_name": "GPU",
         "group_size": 5
       }
-    }],
+    },
   "fp":
-    [{
+    {
       "command": "mpirun -n 16 vasp_std",
       "machine": {
         "batch_type": "DpCloudServer",
@@ -104,5 +104,4 @@
 	"source_list": ["/opt/intel/oneapi/setvars.sh"]
       }
     }
-  ]
 }
diff --git a/tests/tools/machine_fp_single2.json b/tests/tools/machine_fp_single2.json
new file mode 100644
index 000000000..8c2212927
--- /dev/null
+++ b/tests/tools/machine_fp_single2.json
@@ -0,0 +1,14 @@
+{
+  "fp":
+    {
+      "command": "vasp_std",
+      "machine":{
+        "batch_type": "PBS"
+      },
+      "resources": {
+        "group_size" : 8
+      },
+      "_comments" : "In user_forward_files, define input files to be uploaded.",
+      "user_forward_files" : ["vdw_kernel.bindat"]
+    }
+}
\ No newline at end of file
diff --git a/tests/tools/test_convert_mdata.py b/tests/tools/test_convert_mdata.py
index 5458b0faa..5dc1b944e 100644
--- a/tests/tools/test_convert_mdata.py
+++ b/tests/tools/test_convert_mdata.py
@@ -6,12 +6,18 @@
 __package__ = 'tools'
 from dpgen.remote.decide_machine import convert_mdata
 from .context import setUpModule
-machine_file = 'machine_fp_single.json'
+
 class TestConvertMdata(unittest.TestCase):
+    machine_file = 'machine_fp_single.json'
+
     def test_convert_mdata (self):
-        mdata = json.load(open(machine_file))
+        mdata = json.load(open(self.machine_file))
         mdata = convert_mdata(mdata, ["fp"])
         self.assertEqual(mdata["fp_command"], "vasp_std")
         self.assertEqual(mdata["fp_group_size"], 8)
         self.assertEqual(mdata["fp_machine"]["batch_type"], "PBS")
         self.assertEqual(mdata["fp_user_forward_files"], ["vdw_kernel.bindat"])
+
+
+class TestConvertMdata2(TestConvertMdata):
+    machine_file = 'machine_fp_single2.json'

From a0c7333d76142c51310c68ec52988f9f28cc5bab Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 9 Jun 2022 00:53:45 -0400
Subject: [PATCH 14/26] upgrade all tasks to dpdispatcher (#749)

* upgrade all tasks to dpdispatcher

This commit upgrades init_reaction and init_surf to use dpdispatcher

* fix method args

* fix typo

* change the variable name from `work_dir` to `work_path`
---
 dpgen/data/reaction.py         | 38 ++++++++--------
 dpgen/data/surf.py             | 11 ++---
 dpgen/dispatcher/Dispatcher.py | 79 ++++++++++++++++++++++++++++++++++
 3 files changed, 106 insertions(+), 22 deletions(-)

diff --git a/dpgen/data/reaction.py b/dpgen/data/reaction.py
index b9574d525..0abfeb965 100644
--- a/dpgen/data/reaction.py
+++ b/dpgen/data/reaction.py
@@ -7,7 +7,7 @@
 output: data
 """
 
-import argparse
+import warnings
 import glob
 import json
 import os
@@ -15,7 +15,8 @@
 
 import dpdata
 from dpgen import dlog
-from dpgen.dispatcher.Dispatcher import make_dispatcher
+from dpgen.dispatcher.Dispatcher import make_submission_compat
+from dpgen.remote.decide_machine import convert_mdata
 from dpgen.generator.run import create_path, make_fp_task_name
 from dpgen.util import sepline
 
@@ -73,14 +74,15 @@ def make_lmp(jdata):
     return lmp_string
 
 
-def run_reaxff(jdata, mdata, dispatcher, log_file="reaxff_log"):
+def run_reaxff(jdata, mdata, log_file="reaxff_log"):
     work_path = reaxff_path
     reaxff_command = "{} -in {}".format(mdata["reaxff_command"], lmp_path)
     run_tasks = glob.glob(os.path.join(work_path, 'task.*'))
     run_tasks.sort()
     run_tasks = [os.path.basename(ii) for ii in run_tasks]
 
-    dispatcher.run_jobs(mdata['reaxff_resources'],
+    make_submission_compat(mdata['reaxff_machine'],
+                        mdata['reaxff_resources'],
                         [reaxff_command],
                         work_path,
                         run_tasks,
@@ -89,7 +91,8 @@ def run_reaxff(jdata, mdata, dispatcher, log_file="reaxff_log"):
                         [ff_path, data_init_path, control_path, lmp_path],
                         [trj_path],
                         outlog=log_file,
-                        errlog=log_file)
+                        errlog=log_file,
+                        api_version=mdata.get("api_version", "0.9"))
 
 
 def link_trj(jdata):
@@ -102,7 +105,7 @@ def link_trj(jdata):
         os.path.join(task_path, trj_path)))
 
 
-def run_build_dataset(jdata, mdata, dispatcher, log_file="build_log"):
+def run_build_dataset(jdata, mdata, log_file="build_log"):
     work_path = build_path
     build_command = "{cmd} -n {dataset_name} -a {type_map} -d {lammpstrj} -c {cutoff} -s {dataset_size} -k \"{qmkeywords}\" --nprocjob {nprocjob} --nproc {nproc}".format(
         cmd=mdata["build_command"],
@@ -119,7 +122,8 @@ def run_build_dataset(jdata, mdata, dispatcher, log_file="build_log"):
     run_tasks.sort()
     run_tasks = [os.path.basename(ii) for ii in run_tasks]
 
-    dispatcher.run_jobs(mdata['build_resources'],
+    make_submission_compat(mdata['build_machine'],
+                        mdata['build_resources'],
                         [build_command],
                         work_path,
                         run_tasks,
@@ -128,7 +132,8 @@ def run_build_dataset(jdata, mdata, dispatcher, log_file="build_log"):
                         [trj_path],
                         [f"dataset_{dataset_name}_gjf"],
                         outlog=log_file,
-                        errlog=log_file)
+                        errlog=log_file,
+                        api_version=mdata.get("api_version", "0.9"))
 
 
 def link_fp_input():
@@ -146,7 +151,6 @@ def link_fp_input():
 
 def run_fp(jdata,
            mdata,
-           dispatcher,
            log_file="output",
            forward_common_files=[]):
     fp_command = mdata['fp_command']
@@ -162,7 +166,8 @@ def run_fp(jdata,
 
     run_tasks = [os.path.basename(ii) for ii in fp_run_tasks]
 
-    dispatcher.run_jobs(mdata['fp_resources'],
+    make_submission_compat(mdata['fp_machine'],
+                        mdata['fp_resources'],
                         [fp_command],
                         work_path,
                         run_tasks,
@@ -171,7 +176,8 @@ def run_fp(jdata,
                         ["input"],
                         [log_file],
                         outlog=log_file,
-                        errlog=log_file)
+                        errlog=log_file,
+                        api_version=mdata.get("api_version", "0.9"))
 
 
 def convert_data(jdata):
@@ -198,6 +204,7 @@ def gen_init_reaction(args):
             with open(args.MACHINE, "r") as fp:
                 mdata = json.load(fp)
 
+    mdata = convert_mdata(mdata, ["reaxff", "build", "fp"])
     record = "record.reaction"
     iter_rec = -1
     numb_task = 7
@@ -213,18 +220,15 @@ def gen_init_reaction(args):
         elif ii == 0:
             link_reaxff(jdata)
         elif ii == 1:
-            dispatcher = make_dispatcher(mdata["reaxff_machine"])
-            run_reaxff(jdata, mdata, dispatcher)
+            run_reaxff(jdata, mdata)
         elif ii == 2:
             link_trj(jdata)
         elif ii == 3:
-            dispatcher = make_dispatcher(mdata["build_machine"])
-            run_build_dataset(jdata, mdata, dispatcher)
+            run_build_dataset(jdata, mdata)
         elif ii == 4:
             link_fp_input()
         elif ii == 5:
-            dispatcher = make_dispatcher(mdata["fp_machine"])
-            run_fp(jdata, mdata, dispatcher)
+            run_fp(jdata, mdata)
         elif ii == 6:
             convert_data(jdata)
         with open(record, "a") as frec:
diff --git a/dpgen/data/surf.py b/dpgen/data/surf.py
index bc31b6705..543f02bc8 100644
--- a/dpgen/data/surf.py
+++ b/dpgen/data/surf.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3 
 
-import time
+import warnings
 import os,json,shutil,re,glob,argparse
 import numpy as np
 import subprocess as sp
@@ -12,7 +12,7 @@
 from dpgen import dlog
 from dpgen import ROOT_PATH
 from dpgen.remote.decide_machine import  convert_mdata
-from dpgen.dispatcher.Dispatcher import Dispatcher, make_dispatcher
+from dpgen.dispatcher.Dispatcher import make_submission_compat
 #-----PMG---------
 from pymatgen.io.vasp import Poscar
 from pymatgen.core import Structure, Element
@@ -565,15 +565,16 @@ def run_vasp_relax(jdata, mdata):
     run_tasks = [ii.replace(work_dir+"/", "") for ii in relax_run_tasks]
 
     #dlog.info(run_tasks)
-    dispatcher = make_dispatcher(mdata['fp_machine'], mdata['fp_resources'], work_dir, run_tasks, fp_group_size)
-    dispatcher.run_jobs(fp_resources,
+    make_submission_compat(mdata['fp_machine'],
+                       fp_resources,
                        [fp_command],
                        work_dir,
                        run_tasks,
                        fp_group_size,
                        forward_common_files,
                        forward_files,
-                       backward_files)
+                       backward_files,
+                       api_version=mdata.get("api_version", "0.9"))
 
 def gen_init_surf(args):
     try:
diff --git a/dpgen/dispatcher/Dispatcher.py b/dpgen/dispatcher/Dispatcher.py
index 29bea5669..abbe493b8 100644
--- a/dpgen/dispatcher/Dispatcher.py
+++ b/dpgen/dispatcher/Dispatcher.py
@@ -1,5 +1,6 @@
 from distutils.version import LooseVersion
 import os,sys,time,random,json,glob
+import warnings
 from typing import List
 from dpdispatcher import Task, Submission, Resources, Machine
 from dpgen.dispatcher.LocalContext import LocalSession
@@ -406,3 +407,81 @@ def mdata_arginfo() -> List[Argument]:
     return [
         command_arginfo, machine_arginfo, resources_arginfo,
     ]
+
+
+def make_submission_compat(
+        machine: dict,
+        resources: dict,
+        commands: List[str],
+        work_path: str,
+        run_tasks: List[str],
+        group_size: int,
+        forward_common_files: List[str],
+        forward_files: List[str],
+        backward_files: List[str],
+        outlog: str="log",
+        errlog: str="err",
+        api_version: str="0.9",
+    ) -> None:
+    """Make submission with compatibility of both dispatcher API v0 and v1.
+
+    If `api_version` is less than 1.0, use `make_dispatcher`. If
+    `api_version` is large than 1.0, use `make_submission`.
+
+    Parameters
+    ----------
+    machine : dict
+        machine dict
+    resources : dict
+        resource dict
+    commands : list[str]
+        list of commands
+    work_path : str
+        working directory
+    run_tasks : list[str]
+        list of paths to running tasks
+    group_size : int
+        group size
+    forward_common_files : list[str]
+        forwarded common files shared for all tasks
+    forward_files : list[str]
+        forwarded files for each task
+    backward_files : list[str]
+        backwarded files for each task
+    outlog : str, default=log
+        path to log from stdout
+    errlog : str, default=err
+        path to log from stderr
+    api_version : str, default=0.9
+        API version. 1.0 is recommended
+    """
+    if LooseVersion(api_version) < LooseVersion('1.0'):
+        warnings.warn(f"the dpdispatcher will be updated to new version."
+            f"And the interface may be changed. Please check the documents for more details")
+        dispatcher = make_dispatcher(machine, resources, work_dir, run_tasks, group_size)
+        dispatcher.run_jobs(resources,
+                       commands,
+                       work_path,
+                       run_tasks,
+                       group_size,
+                       forward_common_files,
+                       forward_files,
+                       backward_files,
+                       outlog=outlog,
+                       errlog=errlog)
+
+    elif LooseVersion(api_version) >= LooseVersion('1.0'):
+        submission = make_submission(
+            machine,
+            resources,
+            commands=commands,
+            work_path=work_path,
+            run_tasks=run_tasks,
+            group_size=group_size,
+            forward_common_files=forward_common_files,
+            forward_files=forward_files,
+            backward_files=backward_files,
+            outlog=outlog,
+            errlog=errlog)
+        submission.run_submission()
+

From f5feb9ce99afc10a72d7f1f9da7ec03b61838428 Mon Sep 17 00:00:00 2001
From: HuangJiameng <105633685+HuangJiameng@users.noreply.github.com>
Date: Sun, 12 Jun 2022 12:43:44 +0800
Subject: [PATCH 15/26] update: add a new option of absolute volume in
 /dpgen/dpgen/auto_test/EOS.py (#741)

* update: add a new option of absolute volume in ./dpgen/auto_test/EOS.py

* update: add doc in /dpgen/doc/toymodels/

* update: change the description for eos, change the doc in /dpgen/doc/toymodels/

* update: change the notice of absolute volume from print into dlog.info
---
 doc/toymodels/JiamengHuang_pr.md |  9 +++++++++
 dpgen/auto_test/EOS.py           | 15 +++++++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)
 create mode 100644 doc/toymodels/JiamengHuang_pr.md

diff --git a/doc/toymodels/JiamengHuang_pr.md b/doc/toymodels/JiamengHuang_pr.md
new file mode 100644
index 000000000..3b1210228
--- /dev/null
+++ b/doc/toymodels/JiamengHuang_pr.md
@@ -0,0 +1,9 @@
+A new parameter "vol_abs" is added. If you want to use absolute volume to get EOS, you can add
+
+         "vol_abs":      true,
+
+in the "eos" part of property.json
+if it's not mentioned, "False" is set defaultly
+when you are using absolute volume, there will be a notation in the last line of output during "make" process, which is like
+
+treat vol_start and vol_end as absolute volume
diff --git a/dpgen/auto_test/EOS.py b/dpgen/auto_test/EOS.py
index 4f332198b..f824dba8e 100644
--- a/dpgen/auto_test/EOS.py
+++ b/dpgen/auto_test/EOS.py
@@ -24,6 +24,8 @@ def __init__(self,
                 self.vol_start = parameter['vol_start']
                 self.vol_end = parameter['vol_end']
                 self.vol_step = parameter['vol_step']
+                parameter['vol_abs'] = parameter.get('vol_abs', False)
+                self.vol_abs = parameter['vol_abs']
             parameter['cal_type'] = parameter.get('cal_type', 'relaxation')
             self.cal_type = parameter['cal_type']
             default_cal_setting = {"relax_pos": True,
@@ -117,6 +119,10 @@ def make_confs(self,
 
             else:
                 print('gen eos from ' + str(self.vol_start) + ' to ' + str(self.vol_end) + ' by every ' + str(self.vol_step))
+                if self.vol_abs : 
+                    dlog.info('treat vol_start and vol_end as absolute volume')
+                else : 
+                    dlog.info('treat vol_start and vol_end as relative volume')
                 equi_contcar = os.path.join(path_to_equi, 'CONTCAR')
                 if not os.path.exists(equi_contcar):
                     raise RuntimeError("please do relaxation first")
@@ -138,8 +144,13 @@ def make_confs(self,
                     task_list.append(output_task)
                     os.symlink(os.path.relpath(equi_contcar), 'POSCAR.orig')
                     # scale = (vol / vol_to_poscar) ** (1. / 3.)
-                    scale = vol ** (1. / 3.)
-                    eos_params = {'volume': vol * vol_to_poscar, 'scale': scale}
+
+                    if self.vol_abs :
+                        scale = (vol / vol_to_poscar) ** (1. / 3.)
+                        eos_params = {'volume': vol, 'scale': scale}
+                    else :
+                        scale = vol ** (1. / 3.)
+                        eos_params = {'volume': vol * vol_to_poscar, 'scale': scale}
                     dumpfn(eos_params, 'eos.json', indent=4)
                     self.parameter['scale2equi'].append(scale)  # 06/22
                     vasp.poscar_scale('POSCAR.orig', 'POSCAR', scale)

From 7d986d9b2c5fae9ae49c8a741d87f502bf3daa11 Mon Sep 17 00:00:00 2001
From: Liu Renxi <75369672+Liu-RX@users.noreply.github.com>
Date: Tue, 14 Jun 2022 14:05:50 +0800
Subject: [PATCH 16/26] fix a bug in make_abacus_scf_input (#754)

Co-authored-by: LiuRenxi <liurenxi@LiuRenxideMacBook-Pro.local>
---
 README.md                         | 2 +-
 dpgen/generator/lib/abacus_scf.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index fb280e226..f26fb51df 100644
--- a/README.md
+++ b/README.md
@@ -570,7 +570,7 @@ The bold notation of key (such aas **type_map**) means that it's a necessary key
 | **user_fp_params** | Dict |  |Parameters for cp2k calculation. find detail in manual.cp2k.org. only the kind section must be set before use.  we assume that you have basic knowledge for cp2k input.
 | **external_input_path** | String |  | Conflict with key:user_fp_params, use the template input provided by user, some rules should be followed, read the following text in detail.
 | *fp_style == ABACUS*
-| **user_fp_params** | Dict |  |Parameters for ABACUS INPUT. find detail [Here](https://github.com/deepmodeling/abacus-develop/blob/develop/docs/input-main.md#out-descriptor). If `deepks_model` is set, the model file should be in the pseudopotential directory. 
+| **user_fp_params** | Dict |  |Parameters for ABACUS INPUT. find detail [Here](https://github.com/deepmodeling/abacus-develop/blob/develop/docs/input-main.md#out-descriptor). If `deepks_model` is set, the model file should be in the pseudopotential directory. You can also set `KPT` file by adding `k_points` that corresponds to a list of six integers in this dictionary.
 | **fp_orb_files** | List |  |List of atomic orbital files. The files should be in pseudopotential directory. 
 | **fp_dpks_descriptor** | String |  |DeePKS descriptor file name. The file should be in pseudopotential directory. 
 
diff --git a/dpgen/generator/lib/abacus_scf.py b/dpgen/generator/lib/abacus_scf.py
index 256eb1d9d..1a9882979 100644
--- a/dpgen/generator/lib/abacus_scf.py
+++ b/dpgen/generator/lib/abacus_scf.py
@@ -83,7 +83,7 @@ def make_abacus_scf_input(fp_params):
             ret += "deepks_scf %d\n" % fp_params["deepks_scf"]
         elif key == "deepks_model":
             ret += "deepks_model %s\n" % fp_params["deepks_model"]
-        else:
+        elif key != "k_points": # "k_points key is used to generate KPT file."
             ret += "%s %s\n" % (key, str(fp_params[key]))
     return ret
 

From ec1600592dfe9c8883dfe998818bf26e1930e9de Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 14 Jun 2022 02:17:46 -0400
Subject: [PATCH 17/26] init_reaction: fix compatibility with new dpdispatcher
 (#755)

fix compatibility as the key was changed in the dpdispatcher
---
 dpgen/data/reaction.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/dpgen/data/reaction.py b/dpgen/data/reaction.py
index 0abfeb965..5e900f9de 100644
--- a/dpgen/data/reaction.py
+++ b/dpgen/data/reaction.py
@@ -107,6 +107,9 @@ def link_trj(jdata):
 
 def run_build_dataset(jdata, mdata, log_file="build_log"):
     work_path = build_path
+    # compatible with new dpdispatcher and old dpgen.dispatcher
+    build_ntasks = mdata["build_resources"].get("cpu_per_node", mdata["build_resources"]["task_per_node"])
+    fp_ntasks = mdata["fp_resources"].get("cpu_per_node", mdata["fp_resources"]["task_per_node"])
     build_command = "{cmd} -n {dataset_name} -a {type_map} -d {lammpstrj} -c {cutoff} -s {dataset_size} -k \"{qmkeywords}\" --nprocjob {nprocjob} --nproc {nproc}".format(
         cmd=mdata["build_command"],
         type_map=" ".join(jdata["type_map"]),
@@ -114,8 +117,8 @@ def run_build_dataset(jdata, mdata, log_file="build_log"):
         cutoff=jdata["cutoff"],
         dataset_size=jdata["dataset_size"],
         qmkeywords=jdata["qmkeywords"],
-        nprocjob=mdata["fp_resources"]["task_per_node"],
-        nproc=mdata["build_resources"]["task_per_node"],
+        nprocjob=fp_ntasks,
+        nproc=build_ntasks,
         dataset_name=dataset_name
     )
     run_tasks = glob.glob(os.path.join(work_path, 'task.*'))

From d43fb5266b636cea521d00208cdba912dc517de2 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 19 Jun 2022 12:09:51 -0400
Subject: [PATCH 18/26] generate machine parameter docs for simplify and init
 (#751)

* generate machine parameter page for simplify and init

* switching to new dargs directive
---
 doc/conf.py                      |  1 +
 doc/index.rst                    |  5 +++++
 doc/init/init-bulk-mdata.rst     |  6 ++++++
 doc/init/init-reaction-mdata.rst |  6 ++++++
 doc/init/init-surf-mdata.rst     |  6 ++++++
 doc/simplify/simplify-mdata.rst  |  6 ++++++
 dpgen/arginfo.py                 | 35 +++++++++++++++++++++++++++++++
 dpgen/data/arginfo.py            | 36 ++++++++++++++++++++++++++++++++
 dpgen/generator/arginfo.py       | 16 ++------------
 dpgen/simplify/arginfo.py        | 13 ++++++++++++
 10 files changed, 116 insertions(+), 14 deletions(-)
 create mode 100644 doc/init/init-bulk-mdata.rst
 create mode 100644 doc/init/init-reaction-mdata.rst
 create mode 100644 doc/init/init-surf-mdata.rst
 create mode 100644 doc/simplify/simplify-mdata.rst
 create mode 100644 dpgen/arginfo.py
 create mode 100644 dpgen/data/arginfo.py
 create mode 100644 dpgen/simplify/arginfo.py

diff --git a/doc/conf.py b/doc/conf.py
index 9d5ecc006..99dce21b5 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -40,6 +40,7 @@
 
 extensions = [
     'deepmodeling_sphinx',
+    'dargs.sphinx',
     "sphinx_rtd_theme",
     'myst_parser',
     'sphinx.ext.autosummary',
diff --git a/doc/index.rst b/doc/index.rst
index 341ce5d79..6eea4d95b 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -32,6 +32,9 @@ DPGEN's documentation
    :maxdepth: 2
    :caption: Init   
 
+   init/init-bulk-mdata
+   init/init-surf-mdata
+   init/init-reaction-mdata
 
 .. _autotest::
 
@@ -46,6 +49,8 @@ DPGEN's documentation
    :maxdepth: 2
    :caption: Simplify  
 
+   simplify/simplify-mdata
+
 
 .. _tutorial:
 
diff --git a/doc/init/init-bulk-mdata.rst b/doc/init/init-bulk-mdata.rst
new file mode 100644
index 000000000..b3098e906
--- /dev/null
+++ b/doc/init/init-bulk-mdata.rst
@@ -0,0 +1,6 @@
+dpgen init_bulk machine parameters
+==================================
+
+.. dargs::
+   :module: dpgen.data.arginfo
+   :func: init_bulk_mdata_arginfo
diff --git a/doc/init/init-reaction-mdata.rst b/doc/init/init-reaction-mdata.rst
new file mode 100644
index 000000000..2fe35a0d8
--- /dev/null
+++ b/doc/init/init-reaction-mdata.rst
@@ -0,0 +1,6 @@
+dpgen init_reaction machine parameters
+======================================
+
+.. dargs::
+   :module: dpgen.data.arginfo
+   :func: init_reaction_mdata_arginfo
diff --git a/doc/init/init-surf-mdata.rst b/doc/init/init-surf-mdata.rst
new file mode 100644
index 000000000..35e8e322f
--- /dev/null
+++ b/doc/init/init-surf-mdata.rst
@@ -0,0 +1,6 @@
+dpgen init_surf machine parameters
+==================================
+
+.. dargs::
+   :module: dpgen.data.arginfo
+   :func: init_surf_mdata_arginfo
diff --git a/doc/simplify/simplify-mdata.rst b/doc/simplify/simplify-mdata.rst
new file mode 100644
index 000000000..995fc90f8
--- /dev/null
+++ b/doc/simplify/simplify-mdata.rst
@@ -0,0 +1,6 @@
+dpgen simplify machine parameters
+=================================
+
+.. dargs::
+   :module: dpgen.simplify.arginfo
+   :func: simplify_mdata_arginfo
diff --git a/dpgen/arginfo.py b/dpgen/arginfo.py
new file mode 100644
index 000000000..3f657942a
--- /dev/null
+++ b/dpgen/arginfo.py
@@ -0,0 +1,35 @@
+from typing import Tuple
+
+from dargs import Argument
+
+from dpgen.dispatcher.Dispatcher import mdata_arginfo
+
+
+def general_mdata_arginfo(name: str, tasks: Tuple[str]) -> Argument:
+    """Generate arginfo for general mdata.
+
+    Parameters
+    ----------
+    name : str
+        mdata name
+    tasks : tuple[str]
+        tuple of task keys, e.g. ("train", "model_devi", "fp")
+    
+    Returns
+    -------
+    Argument
+        arginfo
+    """
+    
+    doc_api_version = "Please set to 1.0"
+    doc_run_mdata = "machine.json file"
+    arg_api_version = Argument("api_version", str, optional=False, doc=doc_api_version)
+
+    sub_fields = [arg_api_version]
+    doc_mdata = "Parameters of command, machine, and resources for %s"
+    for task in tasks:
+        sub_fields.append(Argument(
+            task, dict, optional=False, sub_fields=mdata_arginfo(),
+            doc=doc_mdata % task,
+        ))
+    return Argument(name, dict, sub_fields=sub_fields, doc=doc_run_mdata)
diff --git a/dpgen/data/arginfo.py b/dpgen/data/arginfo.py
new file mode 100644
index 000000000..d5814c036
--- /dev/null
+++ b/dpgen/data/arginfo.py
@@ -0,0 +1,36 @@
+from dargs import Argument
+
+from dpgen.arginfo import general_mdata_arginfo
+
+
+def init_bulk_mdata_arginfo() -> Argument:
+    """Generate arginfo for dpgen init_bulk mdata.
+    
+    Returns
+    -------
+    Argument
+        arginfo
+    """
+    return general_mdata_arginfo("init_bulk_mdata", ("fp",))
+
+
+def init_surf_mdata_arginfo() -> Argument:
+    """Generate arginfo for dpgen init_surf mdata.
+    
+    Returns
+    -------
+    Argument
+        arginfo
+    """
+    return general_mdata_arginfo("init_surf_mdata", ("fp",))
+
+
+def init_reaction_mdata_arginfo() -> Argument:
+    """Generate arginfo for dpgen init_reaction mdata.
+    
+    Returns
+    -------
+    Argument
+        arginfo
+    """
+    return general_mdata_arginfo("init_reaction_mdata", ("reaxff", "build", "fp"))
diff --git a/dpgen/generator/arginfo.py b/dpgen/generator/arginfo.py
index f8815862d..cb2fb887b 100644
--- a/dpgen/generator/arginfo.py
+++ b/dpgen/generator/arginfo.py
@@ -1,6 +1,6 @@
 from dargs import Argument
 
-from dpgen.dispatcher.Dispatcher import mdata_arginfo
+from dpgen.arginfo import general_mdata_arginfo
 
 def run_mdata_arginfo() -> Argument:
     """Generate arginfo for dpgen run mdata.
@@ -10,16 +10,4 @@ def run_mdata_arginfo() -> Argument:
     Argument
         arginfo
     """
-    
-    doc_api_version = "Please set to 1.0"
-    doc_run_mdata = "machine.json file"
-    arg_api_version = Argument("api_version", str, optional=False, doc=doc_api_version)
-
-    sub_fields = [arg_api_version]
-    doc_mdata = "Parameters of command, machine, and resources for %s"
-    for task in ("train", "model_devi", "fp"):
-        sub_fields.append(Argument(
-            task, dict, optional=False, sub_fields=mdata_arginfo(),
-            doc=doc_mdata % task,
-        ))
-    return Argument("run_mdata", dict, sub_fields=sub_fields, doc=doc_run_mdata)
+    return general_mdata_arginfo("run_mdata", ("train", "model_devi", "fp"))
diff --git a/dpgen/simplify/arginfo.py b/dpgen/simplify/arginfo.py
new file mode 100644
index 000000000..0fbfe606e
--- /dev/null
+++ b/dpgen/simplify/arginfo.py
@@ -0,0 +1,13 @@
+from dargs import Argument
+
+from dpgen.arginfo import general_mdata_arginfo
+
+def simplify_mdata_arginfo() -> Argument:
+    """Generate arginfo for dpgen simplify mdata.
+    
+    Returns
+    -------
+    Argument
+        arginfo
+    """
+    return general_mdata_arginfo("simplify_mdata", ("train", "model_devi", "fp"))

From 4a5557e60dff84b6ff2919dd6df32286e9077474 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 20 Jun 2022 23:17:34 -0400
Subject: [PATCH 19/26] add auto cli docs (#759)

* add auto cli docs

* fix typo

* fix package name...

* forgot to return parser

* add the blank line
---
 doc/conf.py          |  1 +
 doc/index.rst        |  2 ++
 doc/overview/cli.rst |  7 +++++++
 doc/requirements.txt |  1 +
 dpgen/main.py        | 16 +++++++++++++---
 5 files changed, 24 insertions(+), 3 deletions(-)
 create mode 100644 doc/overview/cli.rst

diff --git a/doc/conf.py b/doc/conf.py
index 99dce21b5..97b4b206b 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -45,6 +45,7 @@
     'myst_parser',
     'sphinx.ext.autosummary',
     'sphinx.ext.viewcode',
+    'sphinxarg.ext',
 ]
 
 
diff --git a/doc/index.rst b/doc/index.rst
index 6eea4d95b..eaa229813 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -7,6 +7,8 @@ DPGEN's documentation
 .. toctree::
    :maxdepth: 2
    :caption: Overview
+
+   overview/cli
   
   
 .. _installation::
diff --git a/doc/overview/cli.rst b/doc/overview/cli.rst
new file mode 100644
index 000000000..e57f1b064
--- /dev/null
+++ b/doc/overview/cli.rst
@@ -0,0 +1,7 @@
+Command line interface
+======================
+
+.. argparse::
+   :module: dpgen.main
+   :func: main_parser
+   :prog: dpgen
diff --git a/doc/requirements.txt b/doc/requirements.txt
index 33ad28e39..0ae5c76f1 100644
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -2,6 +2,7 @@ sphinx>=4.0.2
 recommonmark
 sphinx_rtd_theme
 sphinx_markdown_tables
+sphinx-argparse
 myst-parser
 deepmodeling_sphinx
 .
diff --git a/dpgen/main.py b/dpgen/main.py
index 6dcdc4ccd..c93c41ef4 100644
--- a/dpgen/main.py
+++ b/dpgen/main.py
@@ -30,9 +30,14 @@
 __email__ = ""
 
 
-def main():
-    info()
-    print("Description\n------------")
+def main_parser() -> argparse.ArgumentParser:
+    """Returns parser for `dpgen` command.
+    
+    Returns
+    -------
+    argparse.ArgumentParser
+        parser for `dpgen` command
+    """
     parser = argparse.ArgumentParser(description="""
     dpgen is a convenient script that uses DeepGenerator to prepare initial
     data, drive DeepMDkit and analyze results. This script works based on
@@ -156,8 +161,13 @@ def main():
                         help="parameter file, json format")
 
     parser_db.set_defaults(func=db_run)
+    return parser
 
 
+def main():
+    info()
+    print("Description\n------------")
+    parser = main_parser()
     try:
         import argcomplete
         argcomplete.autocomplete(parser)

From 5ed5fa1529bc87f412a141349d9df4a95ff037a8 Mon Sep 17 00:00:00 2001
From: HuangJiameng <105633685+HuangJiameng@users.noreply.github.com>
Date: Wed, 29 Jun 2022 10:01:41 +0800
Subject: [PATCH 20/26] correct the wrong spelling of 'failure' (#764)

---
 dpgen/dispatcher/DispatcherList.py                      | 6 +++---
 examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/dpgen/dispatcher/DispatcherList.py b/dpgen/dispatcher/DispatcherList.py
index 085ae5d9a..22b77fd50 100644
--- a/dpgen/dispatcher/DispatcherList.py
+++ b/dpgen/dispatcher/DispatcherList.py
@@ -45,7 +45,7 @@ def run_jobs(self,
                  mark_failure = False,
                  outlog = 'log',
                  errlog = 'err'):
-        ratio_failure = self.mdata_resources.get("ratio_failue", 0)
+        ratio_failure = self.mdata_resources.get("ratio_failure", 0)
         while True:
             if self.check_all_dispatchers_finished(ratio_failure):
                 self.clean()
@@ -188,7 +188,7 @@ def make_dispatcher(self, ii):
 
 
     # Base
-    def check_dispatcher_status(self, ii, allow_failue=False):
+    def check_dispatcher_status(self, ii, allow_failure=False):
         '''catch running dispatcher exception
            if no exception occured, check finished'''
         if self.dispatcher_list[ii]["dispatcher_status"] == "running":
@@ -198,7 +198,7 @@ def check_dispatcher_status(self, ii, allow_failue=False):
                 clean = self.mdata_resources.get("clean", False)
                 try:
                     # avoid raising ssh exception in download proceess
-                    finished = self.dispatcher_list[ii]["dispatcher"].all_finished(self.dispatcher_list[ii]["entity"].job_handler, allow_failue, clean)
+                    finished = self.dispatcher_list[ii]["dispatcher"].all_finished(self.dispatcher_list[ii]["entity"].job_handler, allow_failure, clean)
                     if finished:
                         self.dispatcher_list[ii]["dispatcher_status"] = "finished"
                 except Exception:
diff --git a/examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json b/examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json
index 3de59661f..e2db8d254 100644
--- a/examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json
+++ b/examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json
@@ -64,7 +64,7 @@
       },
       "resources": {
         "allow_failure": true,
-        "ratio_failue":  0.05,
+        "ratio_failure":  0.05,
         "task_per_node": 16,
         "with_mpi":      true,
         "_comment" : "Load the intel compiler.",

From 3dec4b87b14233b39d5ea6503c286177c1ae8599 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 1 Jul 2022 01:22:16 -0400
Subject: [PATCH 21/26] upgrade machine examples to new dpdispatcher (#762)

---
 .../DeePMD-kit-1.0/machine-local-4GPU.json    | 165 +++++++------
 .../machine/DeePMD-kit-1.x/machine-local.json | 103 ++++----
 .../machine-lsf-slurm-cp2k.json               | 178 +++++++-------
 .../DeePMD-kit-1.x/machine-pbs-gaussian.json  | 163 +++++++------
 .../DeePMD-kit-1.x/machine-slurm-qe.json      | 223 +++++++++---------
 5 files changed, 442 insertions(+), 390 deletions(-)

diff --git a/examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json b/examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json
index e2db8d254..e0e6bfca0 100644
--- a/examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json
+++ b/examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json
@@ -1,79 +1,90 @@
 {
-  "_comment" : "This is an example of DP-GEN on Local device running with 4 GPUs",
-  "_comment" : "Last updated on 2021.5.9 for DP-GEN 0.9.2 by Ke XU",
-  "train": [
-    {
-      "_comment" : "Specify the installed path of DeePMD-kit",
-      "_comment" : "The version of DeePMD-kit should be 1.*",
-      "command": "/home/user/anaconda3/bin/dp",
-      "_comment" : "Specify machine settings",
-      "machine": {
-        "_comment" : "Supported batches include slurm, pbs, shell, lsf.",
-        "batch": "shell",
-        "work_path": "/tmp/dpwork",
-        "_comment": "that's all"
-      },
-      "resources":{
-        "_comment" : "The number of nodes.",
-        "numb_node":     1,
-        "_comment" : "If you choose to run with multiple GPUs simultaneously, just ignore numb_gpu.",
-        "numb_gpu":      0,
-        "_comment" : "The number of CPUs.",
-        "task_per_node": 4,
-        "_comment" : "The number of GPUs that can be used for each task.",
-        "manual_cuda_devices": 4,
-        "_comment" : "The number of tasks that can be run in each GPU.",
-        "manual_cuda_multiplicity":1,
-        "_comment" : "Allow the multi-GPU task running.",
-        "cuda_multi_task": true,
-        "module_list":   [],
-        "_comment" : "Environment to be activated. This will generate source xxx/psxevars.sh in scripts. ",
-        "source_list":   ["/opt/intel/parallel_studio_xe_2020/psxevars.sh"]
-      },
-      "_comment" : "DP-GEN will put 4 tasks together in one submitting script.",
-      "group_size": 4
-    }
-  ],
-
-  "model_devi": [
-    {
-      "machine": {
-        "batch":     "shell",
-        "work_path": "/tmp/dpwork"
-      },
-      "resources": {
-        "numb_node":     1,
-        "numb_gpu":      0,
-        "task_per_node": 4,
-        "manual_cuda_devices": 4,
-        "manual_cuda_multiplicity":1,
-        "cuda_multi_task": true,
-        "source_list":   [],
-        "module_list":   []
-      },
-      "command":    "/home/user/Soft/Deepmd/lammps-stable_29Oct2020/src/lmp_mpi",
-      "group_size": 4
-    }
-  ],
-
-  "fp": [
-    {
-      "machine": {
-        "batch":     "shell",
-        "work_path": "/tmp/dpwork"
-      },
-      "resources": {
-        "allow_failure": true,
-        "ratio_failure":  0.05,
-        "task_per_node": 16,
-        "with_mpi":      true,
-        "_comment" : "Load the intel compiler.",
-        "source_list":   ["/opt/intel/parallel_studio_xe_2020/psxevars.sh"],
-        "envs":          {"PATH" : "/home/user/Soft/VASP/vasp.5.4.4-allbak/bin:$PATH"},
-        "_comment" : "This will generate export PATH=/home/user/Soft/VASP/vasp.5.4.4-allbak/bin:$PATH in scripts;"
-      },
-      "command":    "vasp_std",
-      "group_size": 1
-    }
-  ]
+    "_comment" : "This is an example of DP-GEN on Local device running with 4 GPUs",
+    "_comment": "Last updated on 2021.5.9 for DP-GEN 0.9.2 by Ke XU",
+    "train": {
+        "_comment" : "Specify the installed path of DeePMD-kit",
+        "command": "/home/user/anaconda3/bin/dp",
+        "_comment" : "Specify machine settings",
+        "machine": {
+            "_comment": "Supported batches include slurm, pbs, shell, lsf.",
+            "batch_type": "shell",
+            "context_type": "local",
+            "remote_root": "/tmp/dpwork",
+            "local_root": "./"
+        },
+        "resources": {
+            "module_list": [],
+            "_comment": "Environment to be activated. This will generate source xxx/psxevars.sh in scripts. ",
+            "source_list": [
+                "/opt/intel/parallel_studio_xe_2020/psxevars.sh"
+            ],
+            "batch_type": "shell",
+            "_comment": "DP-GEN will put 4 tasks together in one submitting script.",
+            "group_size": 4,
+            "_comment" : "The number of nodes.",
+            "number_node": 1,
+            "_comment" : "The number of CPUs.",
+            "cpu_per_node": 4,
+            "_comment" : "If you choose to run with multiple GPUs simultaneously, just ignore numb_gpu.",
+            "gpu_per_node": 0,
+            "kwargs": {},
+            "strategy": {
+                "_comment" : "Allow the multi-GPU task running.",
+                "if_cuda_multi_devices": true
+            },
+            "para_deg": 4,
+            "queue_name": ""
+        }
+    },
+    "model_devi": {
+        "machine": {
+            "batch_type": "shell",
+            "context_type": "local",
+            "remote_root": "/tmp/dpwork",
+            "local_root": "./"
+        },
+        "resources": {
+            "source_list": [],
+            "module_list": [],
+            "batch_type": "shell",
+            "group_size": 4,
+            "number_node": 1,
+            "cpu_per_node": 4,
+            "gpu_per_node": 0,
+            "kwargs": {},
+            "strategy": {
+                "if_cuda_multi_devices": true
+            },
+            "para_deg": 4,
+            "queue_name": ""
+        },
+        "command": "/home/user/Soft/Deepmd/lammps-stable_29Oct2020/src/lmp_mpi"
+    },
+    "fp": {
+        "machine": {
+            "batch_type": "shell",
+            "context_type": "local",
+            "remote_root": "/tmp/dpwork",
+            "local_root": "./"
+        },
+        "resources": {
+            "_comment" : "Load the intel compiler.",
+            "source_list": [
+                "/opt/intel/parallel_studio_xe_2020/psxevars.sh"
+            ],
+            "_comment": "This will generate export PATH=/home/user/Soft/VASP/vasp.5.4.4-allbak/bin:$PATH in scripts;",
+            "envs": {
+                "PATH": "/home/user/Soft/VASP/vasp.5.4.4-allbak/bin:$PATH"
+            },
+            "batch_type": "shell",
+            "group_size": 1,
+            "cpu_per_node": 16,
+            "kwargs": {},
+            "queue_name": "",
+            "number_node": 1,
+            "gpu_per_node": 1
+        },
+        "command": "mpirun -n 16 vasp_std || :"
+    },
+    "api_version": "1.0"
 }
diff --git a/examples/machine/DeePMD-kit-1.x/machine-local.json b/examples/machine/DeePMD-kit-1.x/machine-local.json
index a266f712b..c8134d750 100644
--- a/examples/machine/DeePMD-kit-1.x/machine-local.json
+++ b/examples/machine/DeePMD-kit-1.x/machine-local.json
@@ -1,42 +1,65 @@
 {
-    "_comment":		"training on localhost ",
-    "_comment" : "This is for DeePMD-kit 1.*",
-    "train_command" : "/home/wanghan/local/deepmd/1.*/dp",
-    "train_machine":	{
-	"batch":	"shell",
-	"work_path" :	"/home/wanghan/tmp/subs/"
-    },	
-    "train_resources":	{
-	"envs":		{
-	}
-    },    
-
-
-    "_comment":		"model_devi on localhost ",
-    "model_devi_command":	"/home/wanghan/local/bin/lmp_mpi_010",
-    "model_devi_group_size": 5,
-    "model_devi_machine":	{
-	"batch":	"shell",
-	"_comment" : "If lazy_local is true, calculations are done directly in current folders.",
-	"lazy_local" : true
-    },	
-    "model_devi_resources":	{
-    },    
-
-    "_comment":		"fp on localhost ",
-    "fp_command":	"/home/wanghan/local/bin/vasp_std",
-    "fp_group_size":	2,
-    "fp_machine":	{
-	"batch":	"shell",
-	"work_path" :	"/home/wanghan/tmp/subs/",
-	"_comment" :	"that's all"
-    },	
-    "fp_resources":	{
-	"module_list":  ["mpi"],
-	"task_per_node":4,
-	"with_mpi":	true,
-	"_comment":	"that's all"
+    "api_version": "1.0",
+    "train": {
+        "_comment": "training on localhost",
+        "machine": {
+            "batch_type": "shell",
+            "context_type": "local",
+            "remote_root": "/home/wanghan/tmp/subs/",
+            "local_root": "./"
+        },
+        "resources": {
+            "envs": {},
+            "batch_type": "shell",
+            "group_size": 1,
+            "kwargs": {},
+            "queue_name": "",
+            "number_node": 1,
+            "cpu_per_node": 1,
+            "gpu_per_node": 1
+        },
+        "command": "/home/wanghan/local/deepmd/1.*/dp"
     },
-
-    "_comment":		" that's all "
-}
+    "model_devi": {
+        "_comment": "model devi on localhost",
+        "machine": {
+            "_comment": "If lazy_local, calculations are done directly in current folders.",
+            "batch_type": "shell",
+            "context_type": "lazylocal",
+            "local_root": "./"
+        },
+        "resources": {
+            "batch_type": "shell",
+            "group_size": 5,
+            "kwargs": {},
+            "queue_name": "",
+            "number_node": 1,
+            "cpu_per_node": 1,
+            "gpu_per_node": 1
+        },
+        "command": "/home/wanghan/local/bin/lmp_mpi_010"
+    },
+    "fp": {
+        "_comment": "fp on localhost",
+        "machine": {
+            "batch_type": "shell",
+            "context_type": "local",
+            "remote_root": "/home/wanghan/tmp/subs/",
+            "local_root": "./"
+        },
+        "resources": {
+            "module_list": [
+                "mpi"
+            ],
+            "_comment": "that's all",
+            "batch_type": "shell",
+            "group_size": 2,
+            "cpu_per_node": 4,
+            "kwargs": {},
+            "queue_name": "",
+            "number_node": 1,
+            "gpu_per_node": 1
+        },
+        "command": "mpirun -n 4 /home/wanghan/local/bin/vasp_std"
+    }
+}
\ No newline at end of file
diff --git a/examples/machine/DeePMD-kit-1.x/machine-lsf-slurm-cp2k.json b/examples/machine/DeePMD-kit-1.x/machine-lsf-slurm-cp2k.json
index 4fb5845ee..348609c1e 100644
--- a/examples/machine/DeePMD-kit-1.x/machine-lsf-slurm-cp2k.json
+++ b/examples/machine/DeePMD-kit-1.x/machine-lsf-slurm-cp2k.json
@@ -1,88 +1,98 @@
 {
-  "train":
-    {
-      "machine": {
-        "machine_type": "slurm",
-        "hostname": "210.34.15.205",
-        "port": 22,
-        "username": "ybzhuang",
-        "work_path": "/home/ybzhuang/workdir"
-      },
-      "resources": {
-        "numb_gpu": 1,
-        "numb_node": 1,
-        "task_per_node": 1,
-        "partition": "gpu",
-        "job_name": "train",
-        "qos":"emergency",
-        "exclude_list": [],
-        "source_list": [
-        ],
-        "module_list": [
-            "deepmd/1.2"
-		],
-        "time_limit": "96:0:0",
-        "submit_wait_time": 60
-      },
-      "python_path": "/share/apps/deepmd/compress/bin/python3.8"
+    "train": {
+        "machine": {
+            "batch_type": "slurm",
+            "context_type": "ssh",
+            "remote_profile": {
+                "hostname": "210.34.15.205",
+                "port": 22,
+                "username": "ybzhuang"
+            },
+            "remote_root": "/home/ybzhuang/workdir",
+            "local_root": "./"
+        },
+        "resources": {
+            "source_list": [],
+            "module_list": [
+                "deepmd/1.2"
+            ],
+            "batch_type": "slurm",
+            "number_node": 1,
+            "cpu_per_node": 1,
+            "gpu_per_node": 1,
+            "queue_name": "emergency",
+            "custom_flags": [
+                "#SBATCH -t 96:0:0"
+            ],
+            "kwargs": {},
+            "wait_time": 60,
+            "group_size": 1
+        },
+        "command": "/share/apps/deepmd/compress/bin/python3.8-m deepmd"
     },
-  "model_devi":
-    {
-      "machine": {
-        "machine_type": "slurm",
-        "hostname": "210.34.15.205",
-        "port": 22,
-        "username": "ybzhuang",
-        "work_path": "/home/ybzhuang/workdir"
-      },
-      "resources": {
-	"numb_gpu": 1,
-        "numb_node": 1,
-        "task_per_node": 1,
-        "partition": "gpu",
-        "job_name": "md",
-        "qos":"emergency",
-        "exclude_list": [],
-        "source_list": [
-        ],
-        "module_list": [
-            "deepmd/1.2"
-		],
-        "time_limit": "96:0:0",
-        "submit_wait_time": 60
-      },
-      "command": "lmp_mpi",
-      "group_size": 5
+    "model_devi": {
+        "machine": {
+            "batch_type": "slurm",
+            "context_type": "ssh",
+            "remote_profile": {
+                "hostname": "210.34.15.205",
+                "port": 22,
+                "username": "ybzhuang"
+            },
+            "remote_root": "/home/ybzhuang/workdir",
+            "local_root": "./"
+        },
+        "resources": {
+            "source_list": [],
+            "module_list": [
+                "deepmd/1.2"
+            ],
+            "batch_type": "slurm",
+            "group_size": 5,
+            "number_node": 1,
+            "cpu_per_node": 1,
+            "gpu_per_node": 1,
+            "queue_name": "emergency",
+            "custom_flags": [
+                "#SBATCH -t 96:0:0"
+            ],
+            "kwargs": {},
+            "wait_time": 60
+        },
+        "command": "lmp_mpi"
     },
-  "fp":
-    {
-      "machine": {
-        "machine_type": "lsf",
-        "hostname": "localhost",
-        "port": 6666,
-        "username": "ybzhuang",
-        "work_path": "/data/ybzhuang/methane-dpgen/dpgen-tutorial-2020-08-23/dpgen-tutorial-mathane/workpath"
-      },
-      "resources": {
-        "cvasp": false,
-        "task_per_node": 32,
-        "numb_node": 1,
-        "node_cpu": 32,
-        "exclude_list": [],
-        "with_mpi": true,
-        "source_list": [
-        ],
-        "module_list": [
-            "intel/17.5.239",
-            "mpi/intel/2017.5.239",
-            "gcc/5.5.0",
-            "cp2k/7.1"
+    "fp": {
+        "machine": {
+            "batch_type": "lsf",
+            "context_type": "ssh",
+            "remote_profile": {
+                "hostname": "localhost",
+                "port": 6666,
+                "username": "ybzhuang"
+            },
+            "remote_root": "/data/ybzhuang/methane-dpgen/dpgen-tutorial-2020-08-23/dpgen-tutorial-mathane/workpath",
+            "local_root": "./"
+        },
+        "resources": {
+            "source_list": [],
+            "module_list": [
+                "intel/17.5.239",
+                "mpi/intel/2017.5.239",
+                "gcc/5.5.0",
+                "cp2k/7.1"
+            ],
+            "batch_type": "lsf",
+            "group_size": 50,
+            "number_node": 1,
+            "cpu_per_node": 32,
+            "queue_name": "53-medium",
+            "custom_flags": [
+                "#BSUB -W 12:00:00"
             ],
-        "time_limit": "12:00:00",
-        "partition": "53-medium",
-        "_comment": "that's Bel"
-      },
-      "command": "cp2k.popt -i input.inp",
-      "group_size": 50
-    }
-}
+            "kwargs": {},
+            "gpu_per_node": 1
+        },
+        "command": "mpirun -n 32 cp2k.popt -i input.inp"
+    },
+    "api_version": "1.0"
+}
\ No newline at end of file
diff --git a/examples/machine/DeePMD-kit-1.x/machine-pbs-gaussian.json b/examples/machine/DeePMD-kit-1.x/machine-pbs-gaussian.json
index 6893471c5..daa743dcc 100644
--- a/examples/machine/DeePMD-kit-1.x/machine-pbs-gaussian.json
+++ b/examples/machine/DeePMD-kit-1.x/machine-pbs-gaussian.json
@@ -1,79 +1,88 @@
 {
-    "_comment":		"training on localhost ",
-    "train_command":	"/gpfs/home/tzhu/anaconda3/envs/python3.6/bin/dp",
-    "train_machine":	{
-      "machine_type":	"lsf",
-      "hostname" :	"xxx.xxx.xxx.xxx",
-      "port" :	22,
-      "username":	"tzhu",
-      "work_path" :	"/gpfs/home/tzhu/jzzeng/dpgen_workdir",
-      "_comment" :	"that's all"
-        },	
-        "train_resources":	{
-      "source_list":	[ "activate deepmd" ],
-      "envs": {
-        "KMP_BLOCKTIME": 0,
-        "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0"
-      },
-      "numb_gpu": 1,
-      "numb_node": 1,
-      "node_cpu": 0,
-      "partition": "newgpu",
-      "job_name": "dpgen_jzzeng",
-      "with_mpi":	false,
-      "time_limit": false,
-      "_comment":	"that's all"
-    },    
-
-
-    "_comment":		"model_devi on localhost ",
-    "model_devi_command":	"/gpfs/home/tzhu/lammps-stable_5Jun2019/src/lmp_intel_cpu_intelmpi -pk intel 0 omp 2",
-    "model_devi_group_size": 1,
-    "model_devi_machine":	{
-      "machine_type":	"lsf",
-      "hostname" :	"xxx.xxx.xxx.xxx",
-      "port" :	22,
-      "username":	"tzhu",
-      "work_path" :	"/gpfs/home/tzhu/jzzeng/dpgen_workdir",
-      "_comment" :	"that's all"
-    },	
-    "model_devi_resources":	{
-      "envs": {
-        "KMP_BLOCKTIME": 0
-      },
-      "source_list":	[ "activate deepmd" ],
-      "numb_gpu": 1,
-      "numb_node": 1,
-      "node_cpu": 0,
-      "time_limit": false,
-      "partition": "newgpu",
-      "job_name": "dpgen_jzzeng",
-      "with_mpi":	true,
-      "task_per_node": 1,
-      "_comment":	"that's all"
-    },    
-
-    "_comment":		"fp on lsf //localhost ",
-    "fp_command":	"/public/home/tzhu/g16/g16 < input",
-    "fp_group_size":	1,
-    "fp_machine":	{
-      "machine_type":	"pbs",
-      "hostname" :	"xxx.xxx.xxx.xxx",
-      "port" :	2323,
-      "username":	"tzhu",
-      "work_path" :	"/public/home/tzhu/jzzeng/dpgen_workdir",
-      "_comment" :	"that's all"
-    },	
-    "fp_resources":	{
-      "node_cpu":28,
-      "numb_node": 1,
-      "job_name": "dpgen_jzzeng",
-      "task_per_node": 28,
-      "with_mpi":	false,
-      "time_limit": "10:00:00",
-      "allow_failure": true,
-      "partition": "small",
-      "_comment":	"that's all"
+    "api_version": "1.0",
+    "train": {
+        "machine": {
+            "batch_type": "lsf",
+            "context_type": "ssh",
+            "remote_profile": {
+                "hostname": "xxx.xxx.xxx.xxx",
+                "port": 22,
+                "username": "tzhu"
+            },
+            "remote_root": "/gpfs/home/tzhu/jzzeng/dpgen_workdir",
+            "local_root": "./"
+        },
+        "resources": {
+            "source_list": [
+                "activate deepmd"
+            ],
+            "envs": {
+                "KMP_BLOCKTIME": 0,
+                "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0"
+            },
+            "batch_type": "lsf",
+            "group_size": 1,
+            "number_node": 1,
+            "cpu_per_node": 0,
+            "gpu_per_node": 1,
+            "queue_name": "newgpu",
+            "kwargs": {}
+        },
+        "command": "/gpfs/home/tzhu/anaconda3/envs/python3.6/bin/dp"
     },
-    "_comment":		" that's all "
-}
+    "model_devi": {
+        "machine": {
+            "batch_type": "lsf",
+            "context_type": "ssh",
+            "remote_profile": {
+                "hostname": "xxx.xxx.xxx.xxx",
+                "port": 22,
+                "username": "tzhu"
+            },
+            "remote_root": "/gpfs/home/tzhu/jzzeng/dpgen_workdir",
+            "local_root": "./"
+        },
+        "resources": {
+            "envs": {
+                "KMP_BLOCKTIME": 0
+            },
+            "source_list": [
+                "activate deepmd"
+            ],
+            "batch_type": "lsf",
+            "group_size": 1,
+            "number_node": 1,
+            "cpu_per_node": 0,
+            "gpu_per_node": 1,
+            "queue_name": "newgpu",
+            "kwargs": {}
+        },
+        "command": "mpirun -n 0 /gpfs/home/tzhu/lammps-stable_5Jun2019/src/lmp_intel_cpu_intelmpi -pk intel 0 omp 2"
+    },
+    "fp": {
+        "machine": {
+            "batch_type": "pbs",
+            "context_type": "ssh",
+            "remote_profile": {
+                "hostname": "xxx.xxx.xxx.xxx",
+                "port": 2323,
+                "username": "tzhu"
+            },
+            "remote_root": "/public/home/tzhu/jzzeng/dpgen_workdir",
+            "local_root": "./"
+        },
+        "resources": {
+            "batch_type": "pbs",
+            "group_size": 1,
+            "number_node": 1,
+            "cpu_per_node": 28,
+            "queue_name": "small",
+            "custom_flags": [
+                "#PBS -l walltime=10:00:00"
+            ],
+            "kwargs": {},
+            "gpu_per_node": 1
+        },
+        "command": "/public/home/tzhu/g16/g16 < input || :"
+    }
+}
\ No newline at end of file
diff --git a/examples/machine/DeePMD-kit-1.x/machine-slurm-qe.json b/examples/machine/DeePMD-kit-1.x/machine-slurm-qe.json
index 5f15303d6..2ff5b4a4b 100644
--- a/examples/machine/DeePMD-kit-1.x/machine-slurm-qe.json
+++ b/examples/machine/DeePMD-kit-1.x/machine-slurm-qe.json
@@ -1,113 +1,112 @@
- {
-  "_comment" : "This is an example of DP-GEN on Slurm",
-  "_comment" : "Last updated on 2021.4.30 for DP-GEN 0.9.2 by Yuzhi Zhang", 
-  "train" :
-    {
-      "_comment" : "Specify the installed path of DeePMD-kit",
-      "_comment" : "The version of DeePMD-kit should be 1.*", 
-      "command": "PATH_TO_DEEPMD/dp",
-      "_comment" : "Specify machine settings", 
-      "machine": {
-       "_comment" : "Supported batches include slurm, pbs, shell, lsf.",  
-        "batch": "slurm",
-        "_comment" : "If your jobs are executed on a local workstation, you can let hostname be localhost.",
-        "_comment" : "Otherwise you should provide the IP of machine you want to connect via ssh.",
-        "hostname": "localhost",
-        "_comment" : "The port for connection, most common settings is 22",
-        "port": 22,
-        "_comment" : "Specify your username. Sometimes you may need specify password. Exactly the name of key is password. ",
-        "username": "USERNAME",
-        "_comment" : "Specify where you want your job executes, all of tasks will be sent to work_path on this machine.",
-        "_comment" : "You should alwasy make sure that directory of work_path exits. ",
-        "work_path": "PATH_TO_WORK",
-        "_comment": "that's all"
-      },
-      "resources": {
-        "_comment" : "The number of nodes. This will generate #SBATCH -N 1 in your script. ",
-        "numb_node": 1,
-        "_comment" : "The number of GPU cards. #SBATCH --gres=gpu:1", 
-        "numb_gpu": 1,
-        "_comment" : "The number of CPUs. #SBATCH -n 4",
-        "task_per_node": 4,
-        "_comment" : "Partition. #SBATCH -p all",
-        "partition": "all",
-        "_comment" : "Memory limit. #SBATCH --mem=16G",
-        "mem_limit": 16,
-        "_comment" : "Nodelist to be excluded. #SBATCH --exclude=gpu06,gpu07", 
-        "exclude_list": [
-          "gpu06",
-          "gpu07"
-        ],
-        "_comment" : "Environment to be activated. This will generate source PATH/train_new.env . ",
-        "source_list": [
-          "PATH/train_new.env"
-        ],
-        "_comment" : " Module is a common tools on HPC clustes to manage softwares for multiple users.",
-        "_comment" : "Modules to be loaded. This will generate module load intel",
-        "module_list": ["intel"],
-        "_comment" : "Time limit. ",
-        "time_limit": "23:0:0",
-        "_comment": "that's all"
-      }
+{
+    "_comment": "Last updated on 2021.4.30 for DP-GEN 0.9.2 by Yuzhi Zhang",
+    "train": {
+        "_comment" : "Specify the installed path of DeePMD-kit",
+        "command": "PATH_TO_DEEPMD/dp",
+        "_comment" : "Specify machine settings",
+        "machine": {
+            "batch_type": "slurm",
+            "context_type": "ssh",
+            "remote_profile": {
+                "hostname": "localhost",
+                "_comment" : "The port for connection, most common settings is 22",
+                "port": 22,
+                "_comment" : "Specify your username.",
+                "username": "USERNAME"
+            },
+            "_comment" : "You should alwasy make sure that directory of work_path exits. ",
+            "remote_root": "PATH_TO_WORK",
+            "local_root": "./"
+        },
+        "resources": {
+            "_comment" : "Environment to be activated. This will generate source PATH/train_new.env . ",
+            "source_list": [
+                "PATH/train_new.env"
+            ],
+            "_comment" : " Module is a common tools on HPC clustes to manage softwares for multiple users.",
+            "_comment" : "Modules to be loaded. This will generate module load intel",
+            "module_list": [
+                "intel"
+            ],
+            "batch_type": "slurm",
+            "_comment" : "The number of nodes. This will generate #SBATCH -N 1 in your script. ",
+            "number_node": 1,
+            "_comment" : "The number of CPUs. #SBATCH -n 4",
+            "cpu_per_node": 4,
+            "_comment" : "The number of GPU cards. #SBATCH --gres=gpu:1", 
+            "gpu_per_node": 1,
+            "queue_name": "all",
+            "custom_flags": [
+                "#SBATCH -t 23:0:0",
+                "#SBATCH --mem=16G",
+                "#SBATCH --exclude=gpu06,gpu07"
+            ],
+            "kwargs": {},
+            "group_size": 1
+        }
     },
-
-  "model_devi":
-    {
-      "machine": {
-        "machine_type": "slurm",
-        "hostname": "localhost",
-        "port": 22,
-        "username": "USERNAME",
-        "work_path": "PATH_TO_WORK",
-        "_comment": "that's all"
-      },
-      "resources": {
-        "numb_node": 1,
-        "numb_gpu": 1,
-        "task_per_node": 4,
-        "partition": "all",
-        "mem_limit": 16,
-        "exclude_list": [
-          
-        ],
-        "source_list": [
-          "PATH/lmp_new.env"
-        ],
-        "module_list": [],
-        "time_limit": "23:0:0",
-        "_comment": "that's all"
-      },
-      "command": "lmp_serial",
-      "_comment" : "DP-GEN will put 5 tasks together in one submitting script.",  
-      "group_size": 5
-    }, 
-  "fp":
-    {
-      "machine": {
-        "machine_type": "slurm",
-        "hostname": "xxx.xxx.xxx.xxx",
-        "port": 22,
-        "username": "USERNAME",
-        "work_path": "PATH_TO_WORK"
-      },
-      "resources": {
-        "task_per_node": 8,
-        "numb_gpu": 0,
-        "exclude_list": [],
-        "_comment" : "If you set with_mpi to true, the defaulted parallelling command of Slurm, srun, will be appended as prefix.",
-        "_comment" : "If you do not want this, you can set with_mpi to false, and specify parallelling command yourself. ",
-        "_comment" : "Notice that in json format, the upper/lower case is strict. You should write true instead of True and false instead of False", 
-        "with_mpi": false,
-        "source_list": [
-        ],
-        "module_list": [
-          "mpich/3.2.1-intel-2017.1"
-        ],
-        "time_limit": "120:0:0",
-        "partition": "C032M0128G",
-        "_comment": "that's all"
-      },
-      "command": "mpirun -n 8 /gpfs/share/home/1600017784/yuzhi/soft/QE-mpi/PW/src/pw.x < input",
-      "group_size": 1
-    }
-}
+    "model_devi": {
+        "machine": {
+            "batch_type": "slurm",
+            "context_type": "ssh",
+            "remote_profile": {
+                "hostname": "localhost",
+                "port": 22,
+                "username": "USERNAME"
+            },
+            "remote_root": "PATH_TO_WORK",
+            "local_root": "./"
+        },
+        "resources": {
+            "source_list": [
+                "PATH/lmp_new.env"
+            ],
+            "module_list": [],
+            "batch_type": "slurm",
+            "_comment": "DP-GEN will put 5 tasks together in one submitting script.",
+            "group_size": 5,
+            "number_node": 1,
+            "cpu_per_node": 4,
+            "gpu_per_node": 1,
+            "queue_name": "all",
+            "custom_flags": [
+                "#SBATCH -t 23:0:0",
+                "#SBATCH --mem=16G",
+                "#SBATCH --exclude="
+            ],
+            "kwargs": {}
+        },
+        "command": "lmp_serial"
+    },
+    "fp": {
+        "machine": {
+            "batch_type": "slurm",
+            "context_type": "ssh",
+            "remote_profile": {
+                "hostname": "xxx.xxx.xxx.xxx",
+                "port": 22,
+                "username": "USERNAME"
+            },
+            "remote_root": "PATH_TO_WORK",
+            "local_root": "./"
+        },
+        "resources": {
+            "source_list": [],
+            "module_list": [
+                "mpich/3.2.1-intel-2017.1"
+            ],
+            "batch_type": "slurm",
+            "group_size": 1,
+            "cpu_per_node": 8,
+            "gpu_per_node": 0,
+            "queue_name": "C032M0128G",
+            "custom_flags": [
+                "#SBATCH -t 120:0:0"
+            ],
+            "kwargs": {},
+            "number_node": 1
+        },
+        "command": "mpirun -n 8 /gpfs/share/home/1600017784/yuzhi/soft/QE-mpi/PW/src/pw.x < input"
+    },
+    "api_version": "1.0"
+}
\ No newline at end of file

From 9cace602491cff395128eb8020e3c3ffa9007e32 Mon Sep 17 00:00:00 2001
From: HuangJiameng <105633685+HuangJiameng@users.noreply.github.com>
Date: Fri, 1 Jul 2022 13:23:55 +0800
Subject: [PATCH 22/26] =?UTF-8?q?fix=20=E2=80=98post=5Ffp=5Fcp2k=E2=80=99,?=
 =?UTF-8?q?=20add=20param=20rfailed=20(#765)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix ‘post_fp_cp2k’, add param rfailed

* Update run.py
---
 dpgen/generator/run.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py
index 1bd196cc6..f716e2266 100644
--- a/dpgen/generator/run.py
+++ b/dpgen/generator/run.py
@@ -3342,7 +3342,10 @@ def post_fp_gaussian (iter_index,
 
 
 def post_fp_cp2k (iter_index,
-                      jdata):
+                      jdata,
+                      rfailed=None):
+                      
+    ratio_failed =  rfailed if rfailed else jdata.get('ratio_failed',0.10)
     model_devi_jobs = jdata['model_devi_jobs']
     assert (iter_index < len(model_devi_jobs))
 
@@ -3373,7 +3376,7 @@ def post_fp_cp2k (iter_index,
         all_sys = None
         for oo in sys_output :
             _sys = dpdata.LabeledSystem(oo, fmt = 'cp2k/output')
-            _sys.check_type_map(type_map = jdata['type_map'])
+            #_sys.check_type_map(type_map = jdata['type_map'])
             if all_sys is None:
                 all_sys = _sys
             else:
@@ -3385,8 +3388,12 @@ def post_fp_cp2k (iter_index,
             sys_data_path = os.path.join(work_path, 'data.%s'%ss)
             all_sys.to_deepmd_raw(sys_data_path)
             all_sys.to_deepmd_npy(sys_data_path, set_size = len(sys_output))
-    dlog.info("failed frame number: %s "%(tcount-icount))
-    dlog.info("total frame number: %s "%tcount)
+
+    rfail=float(tcount - icount)/float(tcount)
+    dlog.info("failed frame: %6d in %6d  %6.2f %% " % (tcount - icount, tcount, rfail * 100.))
+
+    if rfail>ratio_failed:
+       raise RuntimeError("find too many unsuccessfully terminated jobs. Too many FP tasks are not converged. Please check your files in directories \'iter.*.*/02.fp/task.*.*/.\'")
 
 
 def post_fp_pwmat (iter_index,

From 3cb8b901f5c1bc5bf16fd10139e1d4ba2719268e Mon Sep 17 00:00:00 2001
From: Zhuoyuan <75076820+ZLI-afk@users.noreply.github.com>
Date: Mon, 4 Jul 2022 16:14:17 +0800
Subject: [PATCH 23/26] improve direction dictionary

---
 dpgen/auto_test/Gamma.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/dpgen/auto_test/Gamma.py b/dpgen/auto_test/Gamma.py
index d69d2ef6e..5fd1ea041 100644
--- a/dpgen/auto_test/Gamma.py
+++ b/dpgen/auto_test/Gamma.py
@@ -211,15 +211,15 @@ def return_direction(self):
             miller_str += str(self.miller_index[ii])
         for ii in range(len(self.displace_direction)):
             direct_str += str(self.displace_direction[ii])
-        search_key = miller_str + ':' + direct_str
+        search_key = miller_str + '/' + direct_str
         # define specific cell vectors
         dict_directions = {
-            '100:010': [(0,1,0), (0,0,1), (1,0,0)],
-            '110:111': [(-1,1,1), (1,-1,1), (1,1,0)],
-            '111:110': [(-1,1,0), (-1,-1,2), (1,1,1)],
-            '111:112': [(-1,-1,2), (1,-1,0), (1,1,1)],
-            '112:111': [(-1,-1,1), (1,-1,0), (1,1,2)],
-            '123:111': [(-1,-1,1), (2,-1,0), (1,2,3)]
+            '100/010': [(0,1,0), (0,0,1), (1,0,0)],
+            '110/111': [(-1,1,1), (1,-1,1), (1,1,0)],
+            '111/110': [(-1,1,0), (-1,-1,2), (1,1,1)],
+            '111/112': [(-1,-1,2), (1,-1,0), (1,1,1)],
+            '112/111': [(-1,-1,1), (1,-1,0), (1,1,2)],
+            '123/111': [(-1,-1,1), (2,-1,0), (1,2,3)]
         }
         try:
             directions = dict_directions[search_key]

From 26823b76e5aa7934dd1a54116a4d2f8e8edd9517 Mon Sep 17 00:00:00 2001
From: Zhuoyuan <75076820+ZLI-afk@users.noreply.github.com>
Date: Tue, 5 Jul 2022 14:56:48 +0800
Subject: [PATCH 24/26] fix typos

---
 dpgen/auto_test/Gamma.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/dpgen/auto_test/Gamma.py b/dpgen/auto_test/Gamma.py
index 5fd1ea041..4d32f2f20 100644
--- a/dpgen/auto_test/Gamma.py
+++ b/dpgen/auto_test/Gamma.py
@@ -33,11 +33,7 @@ def __init__(self,
                 self.miller_index = parameter['miller_index']
                 self.displace_direction = parameter['displace_direction']
                 self.lattice_type = parameter['lattice_type']
-
-                # parameter['min_slab_size'] = parameter.get('min_slab_size', 10)
-                # self.min_slab_size = parameter['min_slab_size']
                 parameter['min_supercell_size'] = parameter.get('min_supercell_size', (1,1,5))
-
                 self.min_supercell_size = parameter['min_supercell_size']
                 parameter['min_vacuum_size'] = parameter.get('min_vacuum_size', 20)
                 self.min_vacuum_size = parameter['min_vacuum_size']
@@ -192,15 +188,12 @@ def make_confs(self,
         return task_list
 
     @staticmethod
-    def centralize_slab(slab):
+    def centralize_slab(slab) -> None:
         z_pos_list = list(set([site.position[2] for site in slab]))
         z_pos_list.sort()
         central_atoms = (z_pos_list[-1] - z_pos_list[0])/2
-        #print(f"central_atoms: {central_atoms}")
         central_cell = slab.cell[2][2]/2
-        #print(f"central_cell: {central_cell}")
         disp_length = central_cell - central_atoms
-        #print(f"disp_length: {disp_length}")
         for site in slab:
             site.position[2] += disp_length
 

From 479df751a588691cf64e5613ea9f6b9f84850da8 Mon Sep 17 00:00:00 2001
From: Zhuoyuan <75076820+ZLI-afk@users.noreply.github.com>
Date: Tue, 12 Jul 2022 17:59:02 +0800
Subject: [PATCH 25/26] fix 111 directions and post bugs

---
 dpgen/auto_test/Gamma.py                           | 14 ++++++++------
 .../confs/std-fcc/relaxation/relax_task/CONTCAR    | 12 ++++++++++++
 tests/auto_test/test_gamma.py                      |  8 ++++----
 3 files changed, 24 insertions(+), 10 deletions(-)
 create mode 100644 tests/auto_test/confs/std-fcc/relaxation/relax_task/CONTCAR

diff --git a/dpgen/auto_test/Gamma.py b/dpgen/auto_test/Gamma.py
index 4d32f2f20..5cb3209f9 100644
--- a/dpgen/auto_test/Gamma.py
+++ b/dpgen/auto_test/Gamma.py
@@ -209,8 +209,8 @@ def return_direction(self):
         dict_directions = {
             '100/010': [(0,1,0), (0,0,1), (1,0,0)],
             '110/111': [(-1,1,1), (1,-1,1), (1,1,0)],
-            '111/110': [(-1,1,0), (-1,-1,2), (1,1,1)],
-            '111/112': [(-1,-1,2), (1,-1,0), (1,1,1)],
+            '111/110': [(-1,1,0), (1,1,-2), (1,1,1)],
+            '111/112': [(1,1,-2), (-1,1,0), (1,1,1)],
             '112/111': [(-1,-1,1), (1,-1,0), (1,1,2)],
             '123/111': [(-1,-1,1), (2,-1,0), (1,2,3)]
         }
@@ -346,7 +346,8 @@ def _compute_lower(self,
 
         if not self.reprod:
             ptr_data += str(tuple(self.miller_index)) + ' plane along ' + str(self.displace_direction)
-            ptr_data += "No_task: \tDisplacement \tStacking_Fault_E(J/m^2) EpA(eV) equi_EpA(eV)\n"
+            ptr_data += "No_task: \tDisplacement \tStacking_Fault_E(J/m^2) EpA(eV) slab_equi_EpA(eV)\n"
+            task_result_slab_equi = loadfn(os.path.join(all_tasks[0], 'result_task.json'))
             for ii in all_tasks:
                 task_result = loadfn(os.path.join(ii, 'result_task.json'))
                 natoms = np.sum(task_result['atom_numbs'])
@@ -356,14 +357,15 @@ def _compute_lower(self,
                 equi_path = os.path.abspath(os.path.join(os.path.dirname(output_file), '../relaxation/relax_task'))
                 equi_result = loadfn(os.path.join(equi_path, 'result.json'))
                 equi_epa = equi_result['energies'][-1] / np.sum(equi_result['atom_numbs'])
+                equi_epa_slab = task_result_slab_equi['energies'][-1] / np.sum(equi_result['atom_numbs'])
                 structure_dir = os.path.basename(ii)
 
-                Cf = 1.60217657e-16 / (1e-20 * 2) * 0.001
-                sfe = (task_result['energies'][-1] - equi_epa * natoms) / AA * Cf
+                Cf = 1.60217657e-16 / 1e-20 * 0.001
+                sfe = (task_result['energies'][-1] - equi_epa_slab * natoms) / AA * Cf
 
                 miller_index = loadfn(os.path.join(ii, 'miller.json'))
                 ptr_data += "%-25s     %7.2f   %7.3f    %8.3f %8.3f\n" % (
-                    str(miller_index) + '-' + structure_dir + ':', int(ii[-4:])/self.n_steps, sfe, epa, equi_epa)
+                    str(miller_index) + '-' + structure_dir + ':', int(ii[-4:])/self.n_steps, sfe, epa, equi_epa_slab)
                 res_data[int(ii[-4:])/self.n_steps] = [sfe, epa, equi_epa]
 
 
diff --git a/tests/auto_test/confs/std-fcc/relaxation/relax_task/CONTCAR b/tests/auto_test/confs/std-fcc/relaxation/relax_task/CONTCAR
new file mode 100644
index 000000000..ce4af1e57
--- /dev/null
+++ b/tests/auto_test/confs/std-fcc/relaxation/relax_task/CONTCAR
@@ -0,0 +1,12 @@
+Mo4 
+1.0
+4.0028914311881421e+00 0.0000000000000000e+00 0.0000000000000000e+00 
+-3.3847683075468209e-17 4.0028914311881421e+00 0.0000000000000000e+00 
+-2.3607623963217771e-17 -2.9295144479997265e-17 4.0028914311881421e+00 
+Mo 
+4 
+Cartesian
+   4.0028914312    4.0028914312    4.0028914312
+   4.0028914312    2.0014457156    2.0014457156
+   2.0014457156    4.0028914312    2.0014457156
+   2.0014457156    2.0014457156    4.0028914312
diff --git a/tests/auto_test/test_gamma.py b/tests/auto_test/test_gamma.py
index 46b0df2ce..26d7f4f54 100644
--- a/tests/auto_test/test_gamma.py
+++ b/tests/auto_test/test_gamma.py
@@ -25,7 +25,7 @@ class TestGamma(unittest.TestCase):
 
     def setUp(self):
         _jdata = {
-            "structures":    ["confs/hp-Mo"],
+            "structures":    ["confs/std-fcc"],
             "interaction": {
                 "type":          "vasp",
                 "incar":         "vasp_input/INCAR_Mo",
@@ -35,9 +35,9 @@ def setUp(self):
             "properties": [
                 {
                     "type": "gamma",
-                    "lattice_type": "bcc",
-                    "miller_index": [1, 1, 0],
-                    "displace_direction": [1, 1, 1],
+                    "lattice_type": "fcc",
+                    "miller_index": [1, 1, 1],
+                    "displace_direction": [1, 1, 2],
                     "min_supercell_size": [1, 1, 10],
                     "min_vacuum_size": 10,
                     "add_fix": ["true", "true", "false"],

From 37b2cdbb28c2c5294308722337b1c8e54c31f1df Mon Sep 17 00:00:00 2001
From: Zhuoyuan <75076820+ZLI-afk@users.noreply.github.com>
Date: Tue, 12 Jul 2022 20:15:33 +0800
Subject: [PATCH 26/26] fix post bugs

---
 dpgen/auto_test/Gamma.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/dpgen/auto_test/Gamma.py b/dpgen/auto_test/Gamma.py
index 5cb3209f9..283195dae 100644
--- a/dpgen/auto_test/Gamma.py
+++ b/dpgen/auto_test/Gamma.py
@@ -347,21 +347,22 @@ def _compute_lower(self,
         if not self.reprod:
             ptr_data += str(tuple(self.miller_index)) + ' plane along ' + str(self.displace_direction)
             ptr_data += "No_task: \tDisplacement \tStacking_Fault_E(J/m^2) EpA(eV) slab_equi_EpA(eV)\n"
+            all_tasks.sort()
             task_result_slab_equi = loadfn(os.path.join(all_tasks[0], 'result_task.json'))
             for ii in all_tasks:
                 task_result = loadfn(os.path.join(ii, 'result_task.json'))
                 natoms = np.sum(task_result['atom_numbs'])
                 epa = task_result['energies'][-1] / natoms
+                equi_epa_slab = task_result_slab_equi['energies'][-1] / natoms
                 AA = np.linalg.norm(np.cross(task_result['cells'][0][0], task_result['cells'][0][1]))
 
                 equi_path = os.path.abspath(os.path.join(os.path.dirname(output_file), '../relaxation/relax_task'))
                 equi_result = loadfn(os.path.join(equi_path, 'result.json'))
                 equi_epa = equi_result['energies'][-1] / np.sum(equi_result['atom_numbs'])
-                equi_epa_slab = task_result_slab_equi['energies'][-1] / np.sum(equi_result['atom_numbs'])
                 structure_dir = os.path.basename(ii)
 
                 Cf = 1.60217657e-16 / 1e-20 * 0.001
-                sfe = (task_result['energies'][-1] - equi_epa_slab * natoms) / AA * Cf
+                sfe = (task_result['energies'][-1] - task_result_slab_equi['energies'][-1]) / AA * Cf
 
                 miller_index = loadfn(os.path.join(ii, 'miller.json'))
                 ptr_data += "%-25s     %7.2f   %7.3f    %8.3f %8.3f\n" % (