Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: refactor update_sel and save min_nbor_dist #3829

Merged
merged 14 commits into from
May 31, 2024
Prev Previous commit
Next Next commit
fix pt
Signed-off-by: Jinzhe Zeng <[email protected]>
njzjz committed May 28, 2024

Verified

This commit was signed with the committer’s verified signature.
macmv Neil Macneale V
commit a8d97c95d89379f4305c0dbc9d73a100054471d7
58 changes: 31 additions & 27 deletions deepmd/pt/entrypoints/main.py
Original file line number Diff line number Diff line change
@@ -260,45 +260,49 @@ def train(FLAGS):
config["training"]["training_data"], 0, type_map, None
)
config["model"], min_nbor_dist = BaseModel.update_sel(
config, config["model"]
train_data, type_map, config["model"]
)
else:
assert (
type_map is not None
), "Data stat in multi-task mode must have available type_map! "
train_data = None
for systems in config["training"]["data_dict"]:
tmp_data = get_data(
config["training"]["data_dict"][systems]["training_data"],
0,
type_map,
None,
)
tmp_data.get_batch()
assert tmp_data.get_type_map(), f"In multi-task mode, 'type_map.raw' must be defined in data systems {systems}! "
if train_data is None:
train_data = tmp_data
else:
train_data.system_dirs += tmp_data.system_dirs
train_data.data_systems += tmp_data.data_systems
train_data.natoms += tmp_data.natoms
train_data.natoms_vec += tmp_data.natoms_vec
train_data.default_mesh += tmp_data.default_mesh

training_jdata = deepcopy(config["training"])
training_jdata.pop("data_dict", {})
training_jdata.pop("model_prob", {})
# considering multi-task shares the descriptor, we need a minimal min_nbor_dist
min_nbor_dist = 0
for model_item in config["model"]["model_dict"]:
training_jdata = deepcopy(config["training"])
training_jdata.pop("data_dict", {})
training_jdata.pop("model_prob", {})

fake_global_jdata = {
"model": deepcopy(config["model"]["model_dict"][model_item]),
"training": deepcopy(config["training"]["data_dict"][model_item]),
}
fake_global_jdata["training"].update(training_jdata)

assert (
type_map is not None
), "Data stat in multi-task mode must have available type_map! "
train_data = None
for systems in fake_global_jdata["training"]["data_dict"]:
tmp_data = get_data(
fake_global_jdata["training"]["data_dict"][systems][
"training_data"
],
0,
type_map,
None,
)
tmp_data.get_batch()
assert tmp_data.get_type_map(), f"In multi-task mode, 'type_map.raw' must be defined in data systems {systems}! "
if train_data is None:
train_data = tmp_data
else:
train_data.system_dirs += tmp_data.system_dirs
train_data.data_systems += tmp_data.data_systems
train_data.natoms += tmp_data.natoms
train_data.natoms_vec += tmp_data.natoms_vec
train_data.default_mesh += tmp_data.default_mesh

config["model"]["model_dict"][model_item], min_nbor_dist_item = (
BaseModel.update_sel(
fake_global_jdata, config["model"]["model_dict"][model_item]
train_data, type_map, config["model"]["model_dict"][model_item]
)
)
if min_nbor_dist_item is not None: