From f0d0701f3ef371890c80cdfe252f6e040e7ad39b Mon Sep 17 00:00:00 2001 From: shazj99 Date: Fri, 15 Oct 2021 08:03:08 +0800 Subject: [PATCH 1/3] autotest: fix early return and result checking (#574) Change-Id: I4430b2c2e20da6cb28640dbc6139e62bed5357a2 Co-authored-by: Zhengju Sha --- dpgen/auto_test/common_prop.py | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/dpgen/auto_test/common_prop.py b/dpgen/auto_test/common_prop.py index 00f439d37..92f802275 100644 --- a/dpgen/auto_test/common_prop.py +++ b/dpgen/auto_test/common_prop.py @@ -112,6 +112,7 @@ def run_property(confs, conf_dirs.sort() task_list = [] work_path_list = [] + multiple_ret = [] for ii in conf_dirs: sepline(ch=ii, screen=True) for jj in property_list: @@ -159,7 +160,7 @@ def run_property(confs, all_task = tmp_task_list run_tasks = util.collect_task(all_task, inter_type) if len(run_tasks) == 0: - return + continue else: ret = pool.apply_async(worker, (work_path, all_task, @@ -169,23 +170,13 @@ def run_property(confs, mdata, inter_type, )) - # run_tasks = [os.path.basename(ii) for ii in all_task] - # machine, resources, command, group_size = util.get_machine_info(mdata, inter_type) - # disp = make_dispatcher(machine, resources, work_path, run_tasks, group_size) - # disp.run_jobs(resources, - # command, - # work_path, - # run_tasks, - # group_size, - # forward_common_files, - # forward_files, - # backward_files, - # outlog='outlog', - # errlog='errlog') + multiple_ret.append(ret) pool.close() pool.join() - if ret.successful(): - print('finished') + for ii in range(len(multiple_ret)): + if not multiple_ret[ii].successful(): + raise RuntimeError("Job %d is not successful!" % ii) + print('%d jobs are finished' % len(multiple_ret)) def worker(work_path, From bb1277db9b248cd3803748cbf142a32e45073246 Mon Sep 17 00:00:00 2001 From: shazj99 Date: Fri, 15 Oct 2021 08:06:42 +0800 Subject: [PATCH 2/3] autotest: run relaxation tasks in parallel (#573) Change-Id: I9ef3f65729cc2e2f553ea7ac038e77d219ad4712 Co-authored-by: Zhengju Sha --- dpgen/auto_test/common_equi.py | 96 ++++++++++++++++++++++------------ 1 file changed, 63 insertions(+), 33 deletions(-) diff --git a/dpgen/auto_test/common_equi.py b/dpgen/auto_test/common_equi.py index 437178309..ec4010c70 100644 --- a/dpgen/auto_test/common_equi.py +++ b/dpgen/auto_test/common_equi.py @@ -2,6 +2,7 @@ import os import warnings from monty.serialization import dumpfn +from multiprocessing import Pool import dpgen.auto_test.lib.crys as crys import dpgen.auto_test.lib.util as util @@ -111,6 +112,47 @@ def make_equi(confs, inter.make_input_file(ii, 'relaxation', relax_param) +def worker(work_path, + run_task, + forward_common_files, + forward_files, + backward_files, + mdata, + inter_type): + machine, resources, command, group_size = util.get_machine_info(mdata, inter_type) + disp = make_dispatcher(machine, resources, work_path, [run_task], group_size) + print("%s --> Runing... " % (work_path)) + + api_version = mdata.get('api_version', '0.9') + if LooseVersion(api_version) < LooseVersion('1.0'): + warnings.warn(f"the dpdispatcher will be updated to new version." + f"And the interface may be changed. Please check the documents for more details") + disp.run_jobs(resources, + command, + work_path, + [run_task], + group_size, + forward_common_files, + forward_files, + backward_files, + outlog='outlog', + errlog='errlog') + elif LooseVersion(api_version) >= LooseVersion('1.0'): + submission = make_submission( + mdata_machine=machine, + mdata_resource=resources, + commands=[command], + work_path=work_path, + run_tasks=run_task, + group_size=group_size, + forward_common_files=forward_common_files, + forward_files=forward_files, + backward_files=backward_files, + outlog='outlog', + errlog='errlog' + ) + submission.run_submission() + def run_equi(confs, inter_param, mdata): @@ -120,6 +162,11 @@ def run_equi(confs, for conf in confs: conf_dirs.extend(glob.glob(conf)) conf_dirs.sort() + + processes = len(conf_dirs) + pool = Pool(processes=processes) + print("Submit job via %d processes" % processes) + # generate a list of task names like mp-xxx/relaxation/relax_task # ... work_path_list = [] @@ -150,45 +197,28 @@ def run_equi(confs, if len(run_tasks) == 0: return else: - # if LooseVersion() run_tasks = [os.path.basename(ii) for ii in all_task] machine, resources, command, group_size = util.get_machine_info(mdata, inter_type) print('%d tasks will be submited '%len(run_tasks)) + multiple_ret = [] for ii in range(len(work_path_list)): work_path = work_path_list[ii] - disp = make_dispatcher(machine, resources, work_path, [run_tasks[ii]], group_size) - print("%s --> Runing... "%(work_path)) - - api_version = mdata.get('api_version', '0.9') - if LooseVersion(api_version) < LooseVersion('1.0'): - warnings.warn(f"the dpdispatcher will be updated to new version." - f"And the interface may be changed. Please check the documents for more details") - disp.run_jobs(resources, - command, - work_path, - [run_tasks[ii]], - group_size, - forward_common_files, - forward_files, - backward_files, - outlog='outlog', - errlog='errlog') - elif LooseVersion(api_version) >= LooseVersion('1.0'): - submission = make_submission( - mdata_machine=machine, - mdata_resource=resources, - commands=[command], - work_path=work_path, - run_tasks=run_tasks, - group_size=group_size, - forward_common_files=forward_common_files, - forward_files=forward_files, - backward_files=backward_files, - outlog = 'outlog', - errlog = 'errlog' - ) - submission.run_submission() + ret = pool.apply_async(worker, (work_path, + run_tasks[ii], + forward_common_files, + forward_files, + backward_files, + mdata, + inter_type, + )) + multiple_ret.append(ret) + pool.close() + pool.join() + for ii in range(len(multiple_ret)): + if not multiple_ret[ii].successful(): + raise RuntimeError("Task %d is not successful! work_path: %s " % (ii, work_path_list[ii])) + print('finished') def post_equi(confs, inter_param): # find all POSCARs and their name like mp-xxx From 0cb32d6df80d65021cd59b963eec032c8b71383b Mon Sep 17 00:00:00 2001 From: shazj99 Date: Fri, 15 Oct 2021 08:07:22 +0800 Subject: [PATCH 3/3] autotest: rename task directory to avoid false result in a rerun (#572) Change-Id: Ia138ee7c31b6c41d9f41f5943affa9ebf8803c46 Co-authored-by: Zhengju Sha --- dpgen/auto_test/common_equi.py | 7 +++---- dpgen/auto_test/common_prop.py | 6 ++---- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/dpgen/auto_test/common_equi.py b/dpgen/auto_test/common_equi.py index ec4010c70..8754bb2e6 100644 --- a/dpgen/auto_test/common_equi.py +++ b/dpgen/auto_test/common_equi.py @@ -13,6 +13,8 @@ from distutils.version import LooseVersion from dpgen.dispatcher.Dispatcher import make_submission from dpgen.remote.decide_machine import convert_mdata +from dpgen.auto_test.lib.utils import create_path + lammps_task_type = ['deepmd', 'meam', 'eam_fs', 'eam_alloy'] @@ -78,10 +80,7 @@ def make_equi(confs, if not os.path.exists(poscar): raise FileNotFoundError('no configuration for autotest') relax_dirs = os.path.abspath(os.path.join(ii, 'relaxation', 'relax_task')) # to be consistent with property in make dispatcher - if os.path.exists(relax_dirs): - dlog.warning('%s already exists' % relax_dirs) - else: - os.makedirs(relax_dirs) + create_path(relax_dirs) task_dirs.append(relax_dirs) os.chdir(relax_dirs) # copy POSCARs to mp-xxx/relaxation/relax_task diff --git a/dpgen/auto_test/common_prop.py b/dpgen/auto_test/common_prop.py index 92f802275..ffb767c58 100644 --- a/dpgen/auto_test/common_prop.py +++ b/dpgen/auto_test/common_prop.py @@ -15,6 +15,7 @@ from dpgen.dispatcher.Dispatcher import make_dispatcher from dpgen.dispatcher.Dispatcher import make_submission from dpgen.remote.decide_machine import convert_mdata +from dpgen.auto_test.lib.utils import create_path lammps_task_type = ['deepmd', 'meam', 'eam_fs', 'eam_alloy'] @@ -73,10 +74,7 @@ def make_property(confs, path_to_equi = os.path.join(ii, 'relaxation', 'relax_task') path_to_work = os.path.join(ii, property_type + '_' + suffix) - if os.path.exists(path_to_work): - dlog.warning('%s already exists' % path_to_work) - else: - os.makedirs(path_to_work) + create_path(path_to_work) prop = make_property_instance(jj) task_list = prop.make_confs(path_to_work, path_to_equi, do_refine)