From 38128666627c6d1dc6f56e55d2f9c7313a7902bf Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Wed, 28 Feb 2024 16:42:09 +0800
Subject: [PATCH 01/37] Fix single-task training&data stat

---
 deepmd/pt/model/descriptor/dpa2.py       | 2 +-
 deepmd/pt/model/model/__init__.py        | 9 ++++-----
 deepmd/pt/model/model/model.py           | 4 ++--
 deepmd/utils/path.py                     | 1 +
 examples/water/dpa2/input_torch.json     | 8 ++------
 examples/water/se_atten/input_torch.json | 2 ++
 examples/water/se_e2_a/input_torch.json  | 1 +
 7 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/deepmd/pt/model/descriptor/dpa2.py b/deepmd/pt/model/descriptor/dpa2.py
index e693116cf4..b1df56a004 100644
--- a/deepmd/pt/model/descriptor/dpa2.py
+++ b/deepmd/pt/model/descriptor/dpa2.py
@@ -304,7 +304,7 @@ def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None)
                 }
                 for item in merged
             ]
-            descrpt.compute_input_stats(merged_tmp)
+            descrpt.compute_input_stats(merged_tmp, path)
 
     def serialize(self) -> dict:
         """Serialize the obj to dict."""
diff --git a/deepmd/pt/model/model/__init__.py b/deepmd/pt/model/model/__init__.py
index 0dc9ae20af..b823a051f5 100644
--- a/deepmd/pt/model/model/__init__.py
+++ b/deepmd/pt/model/model/__init__.py
@@ -20,7 +20,7 @@
     BaseDescriptor,
 )
 from deepmd.pt.model.task import (
-    Fitting,
+    BaseFitting,
 )
 
 from .dp_model import (
@@ -61,7 +61,7 @@ def get_zbl_model(model_params):
         fitting_net["out_dim"] = descriptor.get_dim_emb()
         if "ener" in fitting_net["type"]:
             fitting_net["return_energy"] = True
-    fitting = Fitting(**fitting_net)
+    fitting = BaseFitting(**fitting_net)
     dp_model = DPAtomicModel(descriptor, fitting, type_map=model_params["type_map"])
     # pairtab
     filepath = model_params["use_srtab"]
@@ -97,9 +97,8 @@ def get_model(model_params):
         fitting_net["out_dim"] = descriptor.get_dim_emb()
         if "ener" in fitting_net["type"]:
             fitting_net["return_energy"] = True
-    fitting = Fitting(**fitting_net)
-
-    model = EnergyModel(descriptor, fitting, type_map=model_params["type_map"])
+    fitting = BaseFitting(**fitting_net)
+    model = DPModel(descriptor, fitting, type_map=model_params["type_map"])
     model.model_def_script = json.dumps(model_params)
     return model
 
diff --git a/deepmd/pt/model/model/model.py b/deepmd/pt/model/model/model.py
index 0f5e27aea9..e32d2f307d 100644
--- a/deepmd/pt/model/model/model.py
+++ b/deepmd/pt/model/model/model.py
@@ -59,9 +59,9 @@
 # in DPAtomicModel (and other classes), but this requires the developer aware
 # of it when developing it...
 class BaseModel(make_base_model()):
-    def __init__(self):
+    def __init__(self, *args, **kwargs):
         """Construct a basic model for different tasks."""
-        super().__init__()
+        super().__init__(*args, **kwargs)
 
     def compute_or_load_stat(
         self,
diff --git a/deepmd/utils/path.py b/deepmd/utils/path.py
index c9a7cd8554..79361b6c23 100644
--- a/deepmd/utils/path.py
+++ b/deepmd/utils/path.py
@@ -355,6 +355,7 @@ def save_numpy(self, arr: np.ndarray) -> None:
         if self._name in self._keys:
             del self.root[self._name]
         self.root.create_dataset(self._name, data=arr)
+        self.root.flush()
 
     def glob(self, pattern: str) -> List["DPPath"]:
         """Search path using the glob pattern.
diff --git a/examples/water/dpa2/input_torch.json b/examples/water/dpa2/input_torch.json
index 9d783b35d5..108e75df62 100644
--- a/examples/water/dpa2/input_torch.json
+++ b/examples/water/dpa2/input_torch.json
@@ -1,18 +1,13 @@
 {
   "_comment": "that's all",
   "model": {
-    "type_embedding": {
-      "neuron": [
-        8
-      ],
-      "tebd_input_mode": "concat"
-    },
     "type_map": [
       "O",
       "H"
     ],
     "descriptor": {
       "type": "dpa2",
+      "tebd_dim": 8,
       "repinit_rcut": 9.0,
       "repinit_rcut_smth": 8.0,
       "repinit_nsel": 120,
@@ -74,6 +69,7 @@
     "_comment": " that's all"
   },
   "training": {
+    "stat_file": "./dpa2",
     "training_data": {
       "systems": [
         "../data/data_0",
diff --git a/examples/water/se_atten/input_torch.json b/examples/water/se_atten/input_torch.json
index 7da3d64164..bc948cc2a0 100644
--- a/examples/water/se_atten/input_torch.json
+++ b/examples/water/se_atten/input_torch.json
@@ -15,6 +15,7 @@
         50,
         100
       ],
+      "tebd_dim": 8,
       "axis_neuron": 16,
       "attn": 128,
       "attn_layer": 2,
@@ -59,6 +60,7 @@
     "_comment": " that's all"
   },
   "training": {
+    "stat_file": "./dpa1",
     "training_data": {
       "systems": [
         "../data/data_0",
diff --git a/examples/water/se_e2_a/input_torch.json b/examples/water/se_e2_a/input_torch.json
index 053a721a44..c686b49d45 100644
--- a/examples/water/se_e2_a/input_torch.json
+++ b/examples/water/se_e2_a/input_torch.json
@@ -51,6 +51,7 @@
     "_comment": " that's all"
   },
   "training": {
+    "stat_file": "./se_e2_a",
     "training_data": {
       "systems": [
         "../data/data_0",

From ae27607b38d7c0a1b9ed1b9c3219f2bfaa106d56 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Wed, 28 Feb 2024 17:12:03 +0800
Subject: [PATCH 02/37] Fix EnergyFittingNetDirect

---
 deepmd/pt/model/model/dp_model.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/deepmd/pt/model/model/dp_model.py b/deepmd/pt/model/model/dp_model.py
index 5410f518d1..79c129334a 100644
--- a/deepmd/pt/model/model/dp_model.py
+++ b/deepmd/pt/model/model/dp_model.py
@@ -10,6 +10,7 @@
 )
 from deepmd.pt.model.task.ener import (
     EnergyFittingNet,
+    EnergyFittingNetDirect,
 )
 from deepmd.pt.model.task.polarizability import (
     PolarFittingNet,
@@ -36,7 +37,9 @@ def __new__(cls, descriptor, fitting, *args, **kwargs):
         # according to the fitting network to decide the type of the model
         if cls is DPModel:
             # map fitting to model
-            if isinstance(fitting, EnergyFittingNet):
+            if isinstance(fitting, EnergyFittingNet) or isinstance(
+                fitting, EnergyFittingNetDirect
+            ):
                 cls = EnergyModel
             elif isinstance(fitting, DipoleFittingNet):
                 cls = DipoleModel

From f9265d5868ccc8c7c23d5ebb44aa4dbdc99063de Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Wed, 28 Feb 2024 20:39:47 +0800
Subject: [PATCH 03/37] Add data_requirement for dataloader

---
 .../descriptor/make_base_descriptor.py        |  4 +-
 deepmd/dpmodel/model/base_model.py            |  4 +
 deepmd/pt/entrypoints/main.py                 | 41 +--------
 .../pt/model/atomic_model/dp_atomic_model.py  | 17 ++--
 deepmd/pt/model/descriptor/descriptor.py      |  6 +-
 deepmd/pt/model/descriptor/dpa1.py            |  6 +-
 deepmd/pt/model/descriptor/dpa2.py            | 15 ++--
 deepmd/pt/model/descriptor/hybrid.py          | 16 ++--
 deepmd/pt/model/descriptor/repformers.py      | 16 +++-
 deepmd/pt/model/descriptor/se_a.py            | 16 +++-
 deepmd/pt/model/descriptor/se_atten.py        | 16 +++-
 deepmd/pt/model/descriptor/se_r.py            | 16 +++-
 deepmd/pt/model/model/dipole_model.py         | 20 +++++
 deepmd/pt/model/model/dp_zbl_model.py         | 37 ++++++++
 deepmd/pt/model/model/ener_model.py           | 37 ++++++++
 deepmd/pt/model/model/polar_model.py          | 20 +++++
 deepmd/pt/model/task/dipole.py                |  9 +-
 deepmd/pt/model/task/ener.py                  | 13 +--
 deepmd/pt/train/training.py                   | 90 ++++++++++++-------
 deepmd/pt/train/wrapper.py                    |  1 +
 deepmd/pt/utils/dataloader.py                 | 22 +++++
 deepmd/pt/utils/dataset.py                    | 19 +++-
 deepmd/pt/utils/stat.py                       | 36 +++-----
 source/tests/pt/model/test_dipole_fitting.py  |  6 --
 source/tests/pt/test_stat.py                  |  2 -
 25 files changed, 327 insertions(+), 158 deletions(-)

diff --git a/deepmd/dpmodel/descriptor/make_base_descriptor.py b/deepmd/dpmodel/descriptor/make_base_descriptor.py
index 18416ff16b..fe911551d5 100644
--- a/deepmd/dpmodel/descriptor/make_base_descriptor.py
+++ b/deepmd/dpmodel/descriptor/make_base_descriptor.py
@@ -84,9 +84,7 @@ def mixed_types(self) -> bool:
             """
             pass
 
-        def compute_input_stats(
-            self, merged: List[dict], path: Optional[DPPath] = None
-        ):
+        def compute_input_stats(self, merged: callable, path: Optional[DPPath] = None):
             """Update mean and stddev for descriptor elements."""
             raise NotImplementedError
 
diff --git a/deepmd/dpmodel/model/base_model.py b/deepmd/dpmodel/model/base_model.py
index faf3e7cfff..c4b998d763 100644
--- a/deepmd/dpmodel/model/base_model.py
+++ b/deepmd/dpmodel/model/base_model.py
@@ -92,6 +92,10 @@ def is_aparam_nall(self) -> bool:
         def model_output_type(self) -> str:
             """Get the output type for the model."""
 
+        @abstractmethod
+        def data_requirement(self) -> dict:
+            """Get the data requirement for the model."""
+
         @abstractmethod
         def serialize(self) -> dict:
             """Serialize the model.
diff --git a/deepmd/pt/entrypoints/main.py b/deepmd/pt/entrypoints/main.py
index a317cea6a9..c9eba6e579 100644
--- a/deepmd/pt/entrypoints/main.py
+++ b/deepmd/pt/entrypoints/main.py
@@ -47,9 +47,6 @@
 from deepmd.pt.utils.multi_task import (
     preprocess_shared_params,
 )
-from deepmd.pt.utils.stat import (
-    make_stat_input,
-)
 from deepmd.utils.path import (
     DPPath,
 )
@@ -83,7 +80,6 @@ def get_trainer(
         multi_task=multi_task,
         model_branch=model_branch,
     )
-    config["model"]["resuming"] = (finetune_model is not None) or (ckpt is not None)
     shared_links = None
     if multi_task:
         config["model"], shared_links = preprocess_shared_params(config["model"])
@@ -98,24 +94,6 @@ def prepare_trainer_input_single(
         validation_dataset_params = data_dict_single["validation_data"]
         training_systems = training_dataset_params["systems"]
         validation_systems = validation_dataset_params["systems"]
-
-        # noise params
-        noise_settings = None
-        if loss_dict_single.get("type", "ener") == "denoise":
-            noise_settings = {
-                "noise_type": loss_dict_single.pop("noise_type", "uniform"),
-                "noise": loss_dict_single.pop("noise", 1.0),
-                "noise_mode": loss_dict_single.pop("noise_mode", "fix_num"),
-                "mask_num": loss_dict_single.pop("mask_num", 8),
-                "mask_prob": loss_dict_single.pop("mask_prob", 0.15),
-                "same_mask": loss_dict_single.pop("same_mask", False),
-                "mask_coord": loss_dict_single.pop("mask_coord", False),
-                "mask_type": loss_dict_single.pop("mask_type", False),
-                "max_fail_num": loss_dict_single.pop("max_fail_num", 10),
-                "mask_type_idx": len(model_params_single["type_map"]) - 1,
-            }
-        # noise_settings = None
-
         # stat files
         stat_file_path_single = data_dict_single.get("stat_file", None)
         if stat_file_path_single is not None:
@@ -140,29 +118,15 @@ def prepare_trainer_input_single(
                 training_dataset_params["batch_size"],
                 model_params_single,
             )
-            sampled_single = None
         else:
             train_data_single = DpLoaderSet(
                 training_systems,
                 training_dataset_params["batch_size"],
                 model_params_single,
             )
-            data_stat_nbatch = model_params_single.get("data_stat_nbatch", 10)
-            sampled_single = make_stat_input(
-                train_data_single.systems,
-                train_data_single.dataloaders,
-                data_stat_nbatch,
-            )
-            if noise_settings is not None:
-                train_data_single = DpLoaderSet(
-                    training_systems,
-                    training_dataset_params["batch_size"],
-                    model_params_single,
-                )
         return (
             train_data_single,
             validation_data_single,
-            sampled_single,
             stat_file_path_single,
         )
 
@@ -170,18 +134,16 @@ def prepare_trainer_input_single(
         (
             train_data,
             validation_data,
-            sampled,
             stat_file_path,
         ) = prepare_trainer_input_single(
             config["model"], config["training"], config["loss"]
         )
     else:
-        train_data, validation_data, sampled, stat_file_path = {}, {}, {}, {}
+        train_data, validation_data, stat_file_path = {}, {}, {}
         for model_key in config["model"]["model_dict"]:
             (
                 train_data[model_key],
                 validation_data[model_key],
-                sampled[model_key],
                 stat_file_path[model_key],
             ) = prepare_trainer_input_single(
                 config["model"]["model_dict"][model_key],
@@ -193,7 +155,6 @@ def prepare_trainer_input_single(
     trainer = training.Trainer(
         config,
         train_data,
-        sampled=sampled,
         stat_file_path=stat_file_path,
         validation_data=validation_data,
         init_model=init_model,
diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py
index d2c1743d30..5c41499ace 100644
--- a/deepmd/pt/model/atomic_model/dp_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py
@@ -18,9 +18,6 @@
 from deepmd.pt.model.task.base_fitting import (
     BaseFitting,
 )
-from deepmd.pt.utils.utils import (
-    dict_to_device,
-)
 from deepmd.utils.path import (
     DPPath,
 )
@@ -170,7 +167,7 @@ def forward_atomic(
 
     def compute_or_load_stat(
         self,
-        sampled,
+        sampled_func,
         stat_file_path: Optional[DPPath] = None,
     ):
         """
@@ -183,8 +180,8 @@ def compute_or_load_stat(
 
         Parameters
         ----------
-        sampled
-            The sampled data frames from different data systems.
+        sampled_func
+            The lazy sampled function to get data frames from different data systems.
         stat_file_path
             The dictionary of paths to the statistics files.
         """
@@ -192,13 +189,9 @@ def compute_or_load_stat(
             # descriptors and fitting net with different type_map
             # should not share the same parameters
             stat_file_path /= " ".join(self.type_map)
-        for data_sys in sampled:
-            dict_to_device(data_sys)
-        if sampled is None:
-            sampled = []
-        self.descriptor.compute_input_stats(sampled, stat_file_path)
+        self.descriptor.compute_input_stats(sampled_func, stat_file_path)
         if self.fitting_net is not None:
-            self.fitting_net.compute_output_stats(sampled, stat_file_path)
+            self.fitting_net.compute_output_stats(sampled_func, stat_file_path)
 
     @torch.jit.export
     def get_dim_fparam(self) -> int:
diff --git a/deepmd/pt/model/descriptor/descriptor.py b/deepmd/pt/model/descriptor/descriptor.py
index 964cdb01eb..d400e42f75 100644
--- a/deepmd/pt/model/descriptor/descriptor.py
+++ b/deepmd/pt/model/descriptor/descriptor.py
@@ -5,9 +5,11 @@
     abstractmethod,
 )
 from typing import (
+    Callable,
     Dict,
     List,
     Optional,
+    Union,
 )
 
 import torch
@@ -86,7 +88,9 @@ def get_dim_emb(self) -> int:
         """Returns the embedding dimension."""
         pass
 
-    def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
+    def compute_input_stats(
+        self, merged: Union[Callable, List], path: Optional[DPPath] = None
+    ):
         """Update mean and stddev for DescriptorBlock elements."""
         raise NotImplementedError
 
diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py
index b616d20cd8..3e71ea4746 100644
--- a/deepmd/pt/model/descriptor/dpa1.py
+++ b/deepmd/pt/model/descriptor/dpa1.py
@@ -1,7 +1,9 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
+    Callable,
     List,
     Optional,
+    Union,
 )
 
 import torch
@@ -128,7 +130,9 @@ def dim_out(self):
     def dim_emb(self):
         return self.get_dim_emb()
 
-    def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
+    def compute_input_stats(
+        self, merged: Union[Callable, List], path: Optional[DPPath] = None
+    ):
         return self.se_atten.compute_input_stats(merged, path)
 
     def serialize(self) -> dict:
diff --git a/deepmd/pt/model/descriptor/dpa2.py b/deepmd/pt/model/descriptor/dpa2.py
index b1df56a004..49c3f76631 100644
--- a/deepmd/pt/model/descriptor/dpa2.py
+++ b/deepmd/pt/model/descriptor/dpa2.py
@@ -1,7 +1,9 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
+    Callable,
     List,
     Optional,
+    Union,
 )
 
 import torch
@@ -295,16 +297,11 @@ def dim_emb(self):
         """Returns the embedding dimension g2."""
         return self.get_dim_emb()
 
-    def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
+    def compute_input_stats(
+        self, merged: Union[Callable, List], path: Optional[DPPath] = None
+    ):
         for ii, descrpt in enumerate([self.repinit, self.repformers]):
-            merged_tmp = [
-                {
-                    key: item[key] if not isinstance(item[key], list) else item[key][ii]
-                    for key in item
-                }
-                for item in merged
-            ]
-            descrpt.compute_input_stats(merged_tmp, path)
+            descrpt.compute_input_stats(merged, path)
 
     def serialize(self) -> dict:
         """Serialize the obj to dict."""
diff --git a/deepmd/pt/model/descriptor/hybrid.py b/deepmd/pt/model/descriptor/hybrid.py
index 688d448b81..df45217949 100644
--- a/deepmd/pt/model/descriptor/hybrid.py
+++ b/deepmd/pt/model/descriptor/hybrid.py
@@ -1,7 +1,9 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
+    Callable,
     List,
     Optional,
+    Union,
 )
 
 import torch
@@ -157,17 +159,13 @@ def share_params(self, base_class, shared_level, resume=False):
         else:
             raise NotImplementedError
 
-    def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
+    def compute_input_stats(
+        self, merged: Union[Callable, List], path: Optional[DPPath] = None
+    ):
         """Update mean and stddev for descriptor elements."""
         for ii, descrpt in enumerate(self.descriptor_list):
-            merged_tmp = [
-                {
-                    key: item[key] if not isinstance(item[key], list) else item[key][ii]
-                    for key in item
-                }
-                for item in merged
-            ]
-            descrpt.compute_input_stats(merged_tmp, path)
+            # need support for hybrid descriptors
+            descrpt.compute_input_stats(merged, path)
 
     def forward(
         self,
diff --git a/deepmd/pt/model/descriptor/repformers.py b/deepmd/pt/model/descriptor/repformers.py
index ad523bcc2d..000bed2cb1 100644
--- a/deepmd/pt/model/descriptor/repformers.py
+++ b/deepmd/pt/model/descriptor/repformers.py
@@ -1,8 +1,10 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
+    Callable,
     Dict,
     List,
     Optional,
+    Union,
 )
 
 import torch
@@ -278,12 +280,22 @@ def forward(
 
         return g1, g2, h2, rot_mat.view(-1, nloc, self.dim_emb, 3), sw
 
-    def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
+    def compute_input_stats(
+        self, merged: Union[Callable, List], path: Optional[DPPath] = None
+    ):
         """Update mean and stddev for descriptor elements."""
         env_mat_stat = EnvMatStatSe(self)
         if path is not None:
             path = path / env_mat_stat.get_hash()
-        env_mat_stat.load_or_compute_stats(merged, path)
+        if path is None or not path.is_dir():
+            if callable(merged):
+                # only get data for once
+                sampled = merged()
+            else:
+                sampled = merged
+        else:
+            sampled = []
+        env_mat_stat.load_or_compute_stats(sampled, path)
         self.stats = env_mat_stat.stats
         mean, stddev = env_mat_stat()
         if not self.set_davg_zero:
diff --git a/deepmd/pt/model/descriptor/se_a.py b/deepmd/pt/model/descriptor/se_a.py
index 6c29636d6d..ca2c5ef5c2 100644
--- a/deepmd/pt/model/descriptor/se_a.py
+++ b/deepmd/pt/model/descriptor/se_a.py
@@ -1,11 +1,13 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import itertools
 from typing import (
+    Callable,
     ClassVar,
     Dict,
     List,
     Optional,
     Tuple,
+    Union,
 )
 
 import numpy as np
@@ -387,12 +389,22 @@ def __getitem__(self, key):
         else:
             raise KeyError(key)
 
-    def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
+    def compute_input_stats(
+        self, merged: Union[Callable, List], path: Optional[DPPath] = None
+    ):
         """Update mean and stddev for descriptor elements."""
         env_mat_stat = EnvMatStatSe(self)
         if path is not None:
             path = path / env_mat_stat.get_hash()
-        env_mat_stat.load_or_compute_stats(merged, path)
+        if path is None or not path.is_dir():
+            if callable(merged):
+                # only get data for once
+                sampled = merged()
+            else:
+                sampled = merged
+        else:
+            sampled = []
+        env_mat_stat.load_or_compute_stats(sampled, path)
         self.stats = env_mat_stat.stats
         mean, stddev = env_mat_stat()
         if not self.set_davg_zero:
diff --git a/deepmd/pt/model/descriptor/se_atten.py b/deepmd/pt/model/descriptor/se_atten.py
index 0b32bd9341..60612f6046 100644
--- a/deepmd/pt/model/descriptor/se_atten.py
+++ b/deepmd/pt/model/descriptor/se_atten.py
@@ -1,8 +1,10 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
+    Callable,
     Dict,
     List,
     Optional,
+    Union,
 )
 
 import numpy as np
@@ -200,12 +202,22 @@ def dim_emb(self):
         """Returns the output dimension of embedding."""
         return self.get_dim_emb()
 
-    def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
+    def compute_input_stats(
+        self, merged: Union[Callable, List], path: Optional[DPPath] = None
+    ):
         """Update mean and stddev for descriptor elements."""
         env_mat_stat = EnvMatStatSe(self)
         if path is not None:
             path = path / env_mat_stat.get_hash()
-        env_mat_stat.load_or_compute_stats(merged, path)
+        if path is None or not path.is_dir():
+            if callable(merged):
+                # only get data for once
+                sampled = merged()
+            else:
+                sampled = merged
+        else:
+            sampled = []
+        env_mat_stat.load_or_compute_stats(sampled, path)
         self.stats = env_mat_stat.stats
         mean, stddev = env_mat_stat()
         if not self.set_davg_zero:
diff --git a/deepmd/pt/model/descriptor/se_r.py b/deepmd/pt/model/descriptor/se_r.py
index bdb7dafe73..e8a2483da6 100644
--- a/deepmd/pt/model/descriptor/se_r.py
+++ b/deepmd/pt/model/descriptor/se_r.py
@@ -1,9 +1,11 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
+    Callable,
     Dict,
     List,
     Optional,
     Tuple,
+    Union,
 )
 
 import numpy as np
@@ -148,12 +150,22 @@ def mixed_types(self) -> bool:
         """
         return False
 
-    def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
+    def compute_input_stats(
+        self, merged: Union[Callable, List], path: Optional[DPPath] = None
+    ):
         """Update mean and stddev for descriptor elements."""
         env_mat_stat = EnvMatStatSe(self)
         if path is not None:
             path = path / env_mat_stat.get_hash()
-        env_mat_stat.load_or_compute_stats(merged, path)
+        if path is None or not path.is_dir():
+            if callable(merged):
+                # only get data for once
+                sampled = merged()
+            else:
+                sampled = merged
+        else:
+            sampled = []
+        env_mat_stat.load_or_compute_stats(sampled, path)
         self.stats = env_mat_stat.stats
         mean, stddev = env_mat_stat()
         if not self.set_davg_zero:
diff --git a/deepmd/pt/model/model/dipole_model.py b/deepmd/pt/model/model/dipole_model.py
index 6629541459..f6d896b5d8 100644
--- a/deepmd/pt/model/model/dipole_model.py
+++ b/deepmd/pt/model/model/dipole_model.py
@@ -90,3 +90,23 @@ def forward_lower(
         else:
             model_predict = model_ret
         return model_predict
+
+    @property
+    def data_requirement(self):
+        data_requirement = {
+            "dipole": {
+                "ndof": 3,
+                "atomic": False,
+                "must": False,
+                "high_prec": False,
+                "type_sel": self.get_sel_type(),
+            },
+            "atomic_dipole": {
+                "ndof": 3,
+                "atomic": True,
+                "must": False,
+                "high_prec": False,
+                "type_sel": self.get_sel_type(),
+            },
+        }
+        return data_requirement
diff --git a/deepmd/pt/model/model/dp_zbl_model.py b/deepmd/pt/model/model/dp_zbl_model.py
index c8264f2007..fd47b4368d 100644
--- a/deepmd/pt/model/model/dp_zbl_model.py
+++ b/deepmd/pt/model/model/dp_zbl_model.py
@@ -97,3 +97,40 @@ def forward_lower(
             model_predict["dforce"] = model_ret["dforce"]
         model_predict = model_ret
         return model_predict
+
+    @property
+    def data_requirement(self):
+        data_requirement = {
+            "energy": {
+                "ndof": 1,
+                "atomic": False,
+                "must": False,
+                "high_prec": True,
+            },
+            "force": {
+                "ndof": 3,
+                "atomic": True,
+                "must": False,
+                "high_prec": False,
+            },
+            "virial": {
+                "ndof": 9,
+                "atomic": False,
+                "must": False,
+                "high_prec": False,
+            },
+            "atom_ener": {
+                "ndof": 1,
+                "atomic": True,
+                "must": False,
+                "high_prec": False,
+            },
+            "atom_pref": {
+                "ndof": 1,
+                "atomic": True,
+                "must": False,
+                "high_prec": False,
+                "repeat": 3,
+            },
+        }
+        return data_requirement
diff --git a/deepmd/pt/model/model/ener_model.py b/deepmd/pt/model/model/ener_model.py
index 1a5706dbbf..1497cbade4 100644
--- a/deepmd/pt/model/model/ener_model.py
+++ b/deepmd/pt/model/model/ener_model.py
@@ -95,3 +95,40 @@ def forward_lower(
         else:
             model_predict = model_ret
         return model_predict
+
+    @property
+    def data_requirement(self):
+        data_requirement = {
+            "energy": {
+                "ndof": 1,
+                "atomic": False,
+                "must": False,
+                "high_prec": True,
+            },
+            "force": {
+                "ndof": 3,
+                "atomic": True,
+                "must": False,
+                "high_prec": False,
+            },
+            "virial": {
+                "ndof": 9,
+                "atomic": False,
+                "must": False,
+                "high_prec": False,
+            },
+            "atom_ener": {
+                "ndof": 1,
+                "atomic": True,
+                "must": False,
+                "high_prec": False,
+            },
+            "atom_pref": {
+                "ndof": 1,
+                "atomic": True,
+                "must": False,
+                "high_prec": False,
+                "repeat": 3,
+            },
+        }
+        return data_requirement
diff --git a/deepmd/pt/model/model/polar_model.py b/deepmd/pt/model/model/polar_model.py
index d956a0344c..450f5f2fb5 100644
--- a/deepmd/pt/model/model/polar_model.py
+++ b/deepmd/pt/model/model/polar_model.py
@@ -74,3 +74,23 @@ def forward_lower(
         else:
             model_predict = model_ret
         return model_predict
+
+    @property
+    def get_data_requirement(self):
+        data_requirement = {
+            "polar": {
+                "ndof": 9,
+                "atomic": False,
+                "must": False,
+                "high_prec": False,
+                "type_sel": self.get_sel_type(),
+            },
+            "atomic_polar": {
+                "ndof": 9,
+                "atomic": True,
+                "must": False,
+                "high_prec": False,
+                "type_sel": self.get_sel_type(),
+            },
+        }
+        return data_requirement
diff --git a/deepmd/pt/model/task/dipole.py b/deepmd/pt/model/task/dipole.py
index bff3dd93bc..5336e9ed5d 100644
--- a/deepmd/pt/model/task/dipole.py
+++ b/deepmd/pt/model/task/dipole.py
@@ -20,6 +20,9 @@
 from deepmd.pt.utils.env import (
     DEFAULT_PRECISION,
 )
+from deepmd.utils.path import (
+    DPPath,
+)
 
 log = logging.getLogger(__name__)
 
@@ -67,7 +70,6 @@ class DipoleFittingNet(GeneralFitting):
 
     def __init__(
         self,
-        var_name: str,
         ntypes: int,
         dim_descrpt: int,
         embedding_width: int,
@@ -89,7 +91,7 @@ def __init__(
         self.r_differentiable = r_differentiable
         self.c_differentiable = c_differentiable
         super().__init__(
-            var_name=var_name,
+            var_name="dipole",
             ntypes=ntypes,
             dim_descrpt=dim_descrpt,
             neuron=neuron,
@@ -140,6 +142,9 @@ def data_stat_key(self):
         """
         return []
 
+    def compute_output_stats(self, merged, stat_file_path: Optional[DPPath] = None):
+        raise NotImplementedError
+
     def forward(
         self,
         descriptor: torch.Tensor,
diff --git a/deepmd/pt/model/task/ener.py b/deepmd/pt/model/task/ener.py
index 8479111819..f00e4e01be 100644
--- a/deepmd/pt/model/task/ener.py
+++ b/deepmd/pt/model/task/ener.py
@@ -144,17 +144,18 @@ def data_stat_key(self):
         return ["bias_atom_e"]
 
     def compute_output_stats(self, merged, stat_file_path: Optional[DPPath] = None):
-        energy = [item["energy"] for item in merged]
-        data_mixed_type = "real_natoms_vec" in merged[0]
-        if data_mixed_type:
-            input_natoms = [item["real_natoms_vec"] for item in merged]
-        else:
-            input_natoms = [item["natoms"] for item in merged]
         if stat_file_path is not None:
             stat_file_path = stat_file_path / "bias_atom_e"
         if stat_file_path is not None and stat_file_path.is_file():
             bias_atom_e = stat_file_path.load_numpy()
         else:
+            sampled = merged()
+            energy = [item["energy"] for item in sampled]
+            data_mixed_type = "real_natoms_vec" in sampled[0]
+            if data_mixed_type:
+                input_natoms = [item["real_natoms_vec"] for item in sampled]
+            else:
+                input_natoms = [item["natoms"] for item in sampled]
             bias_atom_e = compute_output_bias(energy, input_natoms, rcond=self.rcond)
             if stat_file_path is not None:
                 stat_file_path.save_numpy(bias_atom_e)
diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index 152c69a444..01a8d0ca28 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -38,6 +38,7 @@
 from deepmd.pt.utils.dataloader import (
     BufferedIterator,
     get_weighted_sampler,
+    lazy,
 )
 from deepmd.pt.utils.env import (
     DEVICE,
@@ -49,10 +50,14 @@
 from deepmd.pt.utils.learning_rate import (
     LearningRateExp,
 )
+from deepmd.pt.utils.stat import (
+    make_stat_input,
+)
 
 if torch.__version__.startswith("2"):
     import torch._dynamo
 
+
 import torch.distributed as dist
 from torch.nn.parallel import DistributedDataParallel as DDP
 from torch.utils.data import (
@@ -67,7 +72,6 @@ def __init__(
         self,
         config: Dict[str, Any],
         training_data,
-        sampled=None,
         stat_file_path=None,
         validation_data=None,
         init_model=None,
@@ -82,7 +86,15 @@ def __init__(
         Args:
         - config: The Dict-like configuration with training options.
         """
-        resume_model = init_model if init_model is not None else restart_model
+        if init_model is not None:
+            resume_model = init_model
+        elif restart_model is not None:
+            resume_model = restart_model
+        elif finetune_model is not None:
+            resume_model = finetune_model
+        else:
+            resume_model = None
+        resuming = resume_model is not None
         self.restart_training = restart_model is not None
         model_params = config["model"]
         training_params = config["training"]
@@ -93,8 +105,8 @@ def __init__(
         self.model_keys = (
             list(model_params["model_dict"]) if self.multi_task else ["Default"]
         )
-        if self.multi_task and sampled is None:
-            sampled = {key: None for key in self.model_keys}
+        # if self.multi_task and sampled is None:
+        #     sampled = {key: None for key in self.model_keys}
         self.rank = dist.get_rank() if dist.is_initialized() else 0
         self.world_size = dist.get_world_size() if dist.is_initialized() else 1
         self.num_model = len(self.model_keys)
@@ -184,11 +196,26 @@ def get_data_loader(_training_data, _validation_data, _training_params):
                 valid_numb_batch,
             )
 
-        def get_single_model(_model_params, _sampled, _stat_file_path):
+        def get_single_model(
+            _model_params, _training_data, _validation_data, _stat_file_path
+        ):
             model = get_model(deepcopy(_model_params)).to(DEVICE)
-            if not model_params.get("resuming", False):
+            _training_data.add_data_requirement(model.data_requirement)
+            if _validation_data is not None:
+                _validation_data.add_data_requirement(model.data_requirement)
+            if not resuming:
+
+                @lazy
+                def get_sample():
+                    sampled = make_stat_input(
+                        _training_data.systems,
+                        _training_data.dataloaders,
+                        _model_params.get("data_stat_nbatch", 10),
+                    )
+                    return sampled
+
                 model.compute_or_load_stat(
-                    sampled=_sampled,
+                    sampled_func=get_sample,
                     stat_file_path=_stat_file_path,
                 )
             return model
@@ -233,6 +260,9 @@ def get_loss(loss_params, start_lr, _ntypes):
         # Data + Model
         dp_random.seed(training_params["seed"])
         if not self.multi_task:
+            self.model = get_single_model(
+                model_params, training_data, validation_data, stat_file_path
+            )
             (
                 self.training_dataloader,
                 self.training_data,
@@ -240,7 +270,6 @@ def get_loss(loss_params, start_lr, _ntypes):
                 self.validation_data,
                 self.valid_numb_batch,
             ) = get_data_loader(training_data, validation_data, training_params)
-            self.model = get_single_model(model_params, sampled, stat_file_path)
         else:
             (
                 self.training_dataloader,
@@ -251,6 +280,12 @@ def get_loss(loss_params, start_lr, _ntypes):
                 self.model,
             ) = {}, {}, {}, {}, {}, {}
             for model_key in self.model_keys:
+                self.model[model_key] = get_single_model(
+                    model_params["model_dict"][model_key],
+                    training_data[model_key],
+                    validation_data[model_key],
+                    stat_file_path[model_key],
+                )
                 (
                     self.training_dataloader[model_key],
                     self.training_data[model_key],
@@ -262,11 +297,6 @@ def get_loss(loss_params, start_lr, _ntypes):
                     validation_data[model_key],
                     training_params["data_dict"][model_key],
                 )
-                self.model[model_key] = get_single_model(
-                    model_params["model_dict"][model_key],
-                    sampled[model_key],
-                    stat_file_path[model_key],
-                )
 
         # Learning rate
         self.warmup_steps = training_params.get("warmup_steps", 0)
@@ -309,7 +339,7 @@ def get_loss(loss_params, start_lr, _ntypes):
 
         # resuming and finetune
         optimizer_state_dict = None
-        if model_params["resuming"]:
+        if resuming:
             ntest = model_params.get("data_bias_nsample", 1)
             origin_model = (
                 finetune_model if finetune_model is not None else resume_model
@@ -404,7 +434,7 @@ def get_loss(loss_params, start_lr, _ntypes):
 
         # Multi-task share params
         if shared_links is not None:
-            self.wrapper.share_params(shared_links, resume=model_params["resuming"])
+            self.wrapper.share_params(shared_links, resume=resuming)
 
         if dist.is_initialized():
             torch.cuda.set_device(LOCAL_RANK)
@@ -812,28 +842,22 @@ def get_data(self, is_train=True, task_key="Default"):
                     batch_data[key] = batch_data[key].to(DEVICE)
             else:
                 batch_data[key] = [item.to(DEVICE) for item in batch_data[key]]
-        input_dict = {}
-        for item in [
+        # we may need a better way to classify which are inputs and which are labels
+        # now wrapper only supports the following inputs:
+        input_keys = [
             "coord",
             "atype",
             "box",
-        ]:
-            if item in batch_data:
-                input_dict[item] = batch_data[item]
-            else:
-                input_dict[item] = None
+            "spin",
+        ]
+        input_dict = {item_key: None for item_key in input_keys}
         label_dict = {}
-        for item in [
-            "energy",
-            "force",
-            "virial",
-            "clean_coord",
-            "clean_type",
-            "coord_mask",
-            "type_mask",
-        ]:
-            if item in batch_data:
-                label_dict[item] = batch_data[item]
+        for item_key in batch_data:
+            if item_key in input_keys:
+                input_dict[item_key] = batch_data[item_key]
+            else:
+                if item_key not in ["sid", "fid"] and "find_" not in item_key:
+                    label_dict[item_key] = batch_data[item_key]
         log_dict = {}
         if "fid" in batch_data:
             log_dict["fid"] = batch_data["fid"]
diff --git a/deepmd/pt/train/wrapper.py b/deepmd/pt/train/wrapper.py
index 2207f111a0..ba9cd8c288 100644
--- a/deepmd/pt/train/wrapper.py
+++ b/deepmd/pt/train/wrapper.py
@@ -159,6 +159,7 @@ def forward(
         coord,
         atype,
         box: Optional[torch.Tensor] = None,
+        spin: Optional[torch.Tensor] = None,
         cur_lr: Optional[torch.Tensor] = None,
         label: Optional[torch.Tensor] = None,
         task_key: Optional[torch.Tensor] = None,
diff --git a/deepmd/pt/utils/dataloader.py b/deepmd/pt/utils/dataloader.py
index 2125f9cdee..9d30748321 100644
--- a/deepmd/pt/utils/dataloader.py
+++ b/deepmd/pt/utils/dataloader.py
@@ -147,6 +147,11 @@ def __getitem__(self, idx):
         batch["sid"] = idx
         return batch
 
+    def add_data_requirement(self, dict_of_keys):
+        """Add data requirement for each system in multiple systems."""
+        for system in self.systems:
+            system.add_data_requirement(dict_of_keys)
+
 
 _sentinel = object()
 QUEUESIZE = 32
@@ -254,3 +259,20 @@ def get_weighted_sampler(training_data, prob_style, sys_prob=False):
     with torch.device("cpu"):
         sampler = WeightedRandomSampler(probs, len_sampler, replacement=True)
     return sampler
+
+
+class LazyFunction:
+    def __init__(self, func):
+        self.func = func
+        self.result = None
+        self.called = False
+
+    def __call__(self, *args, **kwargs):
+        if not self.called:
+            self.result = self.func(*args, **kwargs)
+            self.called = True
+        return self.result
+
+
+def lazy(func):
+    return LazyFunction(func)
diff --git a/deepmd/pt/utils/dataset.py b/deepmd/pt/utils/dataset.py
index 4619b6417f..9de82778dc 100644
--- a/deepmd/pt/utils/dataset.py
+++ b/deepmd/pt/utils/dataset.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 
+
 from torch.utils.data import (
     Dataset,
 )
@@ -27,9 +28,6 @@ def __init__(
         self._data_system = DeepmdData(
             sys_path=system, shuffle_test=shuffle, type_map=self._type_map
         )
-        self._data_system.add("energy", 1, atomic=False, must=False, high_prec=True)
-        self._data_system.add("force", 3, atomic=True, must=False, high_prec=False)
-        self._data_system.add("virial", 9, atomic=False, must=False, high_prec=False)
         self.mixed_type = self._data_system.mixed_type
         self._ntypes = self._data_system.get_ntypes()
         self._natoms = self._data_system.get_natoms()
@@ -43,3 +41,18 @@ def __getitem__(self, index):
         b_data = self._data_system.get_item_torch(index)
         b_data["natoms"] = self._natoms_vec
         return b_data
+
+    def add_data_requirement(self, dict_of_keys):
+        """Add data requirement for this data system."""
+        for data_key in dict_of_keys:
+            self._data_system.add(
+                data_key,
+                dict_of_keys[data_key]["ndof"],
+                atomic=dict_of_keys[data_key].get("atomic", False),
+                must=dict_of_keys[data_key].get("must", False),
+                high_prec=dict_of_keys[data_key].get("high_prec", False),
+                type_sel=dict_of_keys[data_key].get("type_sel", None),
+                repeat=dict_of_keys[data_key].get("repeat", 1),
+                default=dict_of_keys[data_key].get("default", 0.0),
+                dtype=dict_of_keys[data_key].get("dtype", None),
+            )
diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py
index 38f71d6994..661653b41e 100644
--- a/deepmd/pt/utils/stat.py
+++ b/deepmd/pt/utils/stat.py
@@ -4,6 +4,10 @@
 import numpy as np
 import torch
 
+from deepmd.pt.utils.utils import (
+    dict_to_device,
+)
+
 log = logging.getLogger(__name__)
 
 
@@ -19,19 +23,9 @@ def make_stat_input(datasets, dataloaders, nbatches):
     - a list of dicts, each of which contains data from a system
     """
     lst = []
-    keys = [
-        "coord",
-        "force",
-        "energy",
-        "atype",
-        "box",
-        "natoms",
-    ]
-    if datasets[0].mixed_type:
-        keys.append("real_natoms_vec")
     log.info(f"Packing data for statistics from {len(datasets)} systems")
     for i in range(len(datasets)):
-        sys_stat = {key: [] for key in keys}
+        sys_stat = {}
         with torch.device("cpu"):
             iterator = iter(dataloaders[i])
             for _ in range(nbatches):
@@ -41,20 +35,16 @@ def make_stat_input(datasets, dataloaders, nbatches):
                     iterator = iter(dataloaders[i])
                     stat_data = next(iterator)
                 for dd in stat_data:
-                    if dd in keys:
+                    if isinstance(stat_data[dd], torch.Tensor):
+                        if dd not in sys_stat:
+                            sys_stat[dd] = []
                         sys_stat[dd].append(stat_data[dd])
-        for key in keys:
-            if not isinstance(sys_stat[key][0], list):
-                if sys_stat[key][0] is None:
-                    sys_stat[key] = None
-                else:
-                    sys_stat[key] = torch.cat(sys_stat[key], dim=0)
+        for key in sys_stat:
+            if sys_stat[key][0] is None:
+                sys_stat[key] = None
             else:
-                sys_stat_list = []
-                for ii, _ in enumerate(sys_stat[key][0]):
-                    tmp_stat = [x[ii] for x in sys_stat[key]]
-                    sys_stat_list.append(torch.cat(tmp_stat, dim=0))
-                sys_stat[key] = sys_stat_list
+                sys_stat[key] = torch.cat(sys_stat[key], dim=0)
+        dict_to_device(sys_stat)
         lst.append(sys_stat)
     return lst
 
diff --git a/source/tests/pt/model/test_dipole_fitting.py b/source/tests/pt/model/test_dipole_fitting.py
index fcdd408726..83054f1042 100644
--- a/source/tests/pt/model/test_dipole_fitting.py
+++ b/source/tests/pt/model/test_dipole_fitting.py
@@ -79,7 +79,6 @@ def test_consistency(
             [0, 4],
         ):
             ft0 = DipoleFittingNet(
-                "foo",
                 self.nt,
                 self.dd0.dim_out,
                 embedding_width=self.dd0.get_dim_emb(),
@@ -132,7 +131,6 @@ def test_jit(
             [0, 4],
         ):
             ft0 = DipoleFittingNet(
-                "foo",
                 self.nt,
                 self.dd0.dim_out,
                 embedding_width=self.dd0.get_dim_emb(),
@@ -168,7 +166,6 @@ def test_rot(self):
             [0, 4],
         ):
             ft0 = DipoleFittingNet(
-                "foo",
                 3,  # ntype
                 self.dd0.dim_out,  # dim_descrpt
                 embedding_width=self.dd0.get_dim_emb(),
@@ -218,7 +215,6 @@ def test_rot(self):
     def test_permu(self):
         coord = torch.matmul(self.coord, self.cell)
         ft0 = DipoleFittingNet(
-            "foo",
             3,  # ntype
             self.dd0.dim_out,
             embedding_width=self.dd0.get_dim_emb(),
@@ -260,7 +256,6 @@ def test_trans(self):
             self.cell,
         )
         ft0 = DipoleFittingNet(
-            "foo",
             3,  # ntype
             self.dd0.dim_out,
             embedding_width=self.dd0.get_dim_emb(),
@@ -305,7 +300,6 @@ def setUp(self):
         self.atype = torch.IntTensor([0, 0, 0, 1, 1], device="cpu").to(env.DEVICE)
         self.dd0 = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE)
         self.ft0 = DipoleFittingNet(
-            "dipole",
             self.nt,
             self.dd0.dim_out,
             embedding_width=self.dd0.get_dim_emb(),
diff --git a/source/tests/pt/test_stat.py b/source/tests/pt/test_stat.py
index 1e3c707d6f..7ef2d85e18 100644
--- a/source/tests/pt/test_stat.py
+++ b/source/tests/pt/test_stat.py
@@ -176,8 +176,6 @@ def test_descriptor(self):
         for sys in sampled:
             for key in [
                 "coord",
-                "force",
-                "energy",
                 "atype",
                 "natoms",
                 "box",

From c9eb767581a9de7f9837de918b53afa378bf9c8c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 28 Feb 2024 12:44:16 +0000
Subject: [PATCH 04/37] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 deepmd/pt/model/task/dipole.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/deepmd/pt/model/task/dipole.py b/deepmd/pt/model/task/dipole.py
index 000600cdf2..68ce09a080 100644
--- a/deepmd/pt/model/task/dipole.py
+++ b/deepmd/pt/model/task/dipole.py
@@ -134,11 +134,9 @@ def output_def(self) -> FittingOutputDef:
             ]
         )
 
-
     def compute_output_stats(self, merged, stat_file_path: Optional[DPPath] = None):
         raise NotImplementedError
 
-
     def forward(
         self,
         descriptor: torch.Tensor,

From 00105c7d548e297724d223d5667245d0e84102bf Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Wed, 28 Feb 2024 20:47:21 +0800
Subject: [PATCH 05/37] Update make_base_descriptor.py

---
 deepmd/dpmodel/descriptor/make_base_descriptor.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/deepmd/dpmodel/descriptor/make_base_descriptor.py b/deepmd/dpmodel/descriptor/make_base_descriptor.py
index fe911551d5..0f55916111 100644
--- a/deepmd/dpmodel/descriptor/make_base_descriptor.py
+++ b/deepmd/dpmodel/descriptor/make_base_descriptor.py
@@ -4,8 +4,10 @@
     abstractmethod,
 )
 from typing import (
+    Callable,
     List,
     Optional,
+    Union,
 )
 
 from deepmd.common import (
@@ -84,7 +86,9 @@ def mixed_types(self) -> bool:
             """
             pass
 
-        def compute_input_stats(self, merged: callable, path: Optional[DPPath] = None):
+        def compute_input_stats(
+            self, merged: Union[Callable, List], path: Optional[DPPath] = None
+        ):
             """Update mean and stddev for descriptor elements."""
             raise NotImplementedError
 

From 5a9df83fbf4c60f09706fe7fbc7e3553650cf5ad Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Wed, 28 Feb 2024 20:49:41 +0800
Subject: [PATCH 06/37] Update typing

---
 deepmd/dpmodel/descriptor/make_base_descriptor.py | 2 +-
 deepmd/pt/model/descriptor/descriptor.py          | 2 +-
 deepmd/pt/model/descriptor/dpa1.py                | 2 +-
 deepmd/pt/model/descriptor/dpa2.py                | 2 +-
 deepmd/pt/model/descriptor/hybrid.py              | 2 +-
 deepmd/pt/model/descriptor/repformers.py          | 2 +-
 deepmd/pt/model/descriptor/se_a.py                | 6 ++++--
 deepmd/pt/model/descriptor/se_atten.py            | 2 +-
 deepmd/pt/model/descriptor/se_r.py                | 2 +-
 9 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/deepmd/dpmodel/descriptor/make_base_descriptor.py b/deepmd/dpmodel/descriptor/make_base_descriptor.py
index 0f55916111..ab4c206fdf 100644
--- a/deepmd/dpmodel/descriptor/make_base_descriptor.py
+++ b/deepmd/dpmodel/descriptor/make_base_descriptor.py
@@ -87,7 +87,7 @@ def mixed_types(self) -> bool:
             pass
 
         def compute_input_stats(
-            self, merged: Union[Callable, List], path: Optional[DPPath] = None
+            self, merged: Union[Callable, List[dict]], path: Optional[DPPath] = None
         ):
             """Update mean and stddev for descriptor elements."""
             raise NotImplementedError
diff --git a/deepmd/pt/model/descriptor/descriptor.py b/deepmd/pt/model/descriptor/descriptor.py
index d400e42f75..02cd657c7c 100644
--- a/deepmd/pt/model/descriptor/descriptor.py
+++ b/deepmd/pt/model/descriptor/descriptor.py
@@ -89,7 +89,7 @@ def get_dim_emb(self) -> int:
         pass
 
     def compute_input_stats(
-        self, merged: Union[Callable, List], path: Optional[DPPath] = None
+        self, merged: Union[Callable, List[dict]], path: Optional[DPPath] = None
     ):
         """Update mean and stddev for DescriptorBlock elements."""
         raise NotImplementedError
diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py
index 3e71ea4746..1d8da3469f 100644
--- a/deepmd/pt/model/descriptor/dpa1.py
+++ b/deepmd/pt/model/descriptor/dpa1.py
@@ -131,7 +131,7 @@ def dim_emb(self):
         return self.get_dim_emb()
 
     def compute_input_stats(
-        self, merged: Union[Callable, List], path: Optional[DPPath] = None
+        self, merged: Union[Callable, List[dict]], path: Optional[DPPath] = None
     ):
         return self.se_atten.compute_input_stats(merged, path)
 
diff --git a/deepmd/pt/model/descriptor/dpa2.py b/deepmd/pt/model/descriptor/dpa2.py
index 49c3f76631..ee86d81fae 100644
--- a/deepmd/pt/model/descriptor/dpa2.py
+++ b/deepmd/pt/model/descriptor/dpa2.py
@@ -298,7 +298,7 @@ def dim_emb(self):
         return self.get_dim_emb()
 
     def compute_input_stats(
-        self, merged: Union[Callable, List], path: Optional[DPPath] = None
+        self, merged: Union[Callable, List[dict]], path: Optional[DPPath] = None
     ):
         for ii, descrpt in enumerate([self.repinit, self.repformers]):
             descrpt.compute_input_stats(merged, path)
diff --git a/deepmd/pt/model/descriptor/hybrid.py b/deepmd/pt/model/descriptor/hybrid.py
index df45217949..a761204d64 100644
--- a/deepmd/pt/model/descriptor/hybrid.py
+++ b/deepmd/pt/model/descriptor/hybrid.py
@@ -160,7 +160,7 @@ def share_params(self, base_class, shared_level, resume=False):
             raise NotImplementedError
 
     def compute_input_stats(
-        self, merged: Union[Callable, List], path: Optional[DPPath] = None
+        self, merged: Union[Callable, List[dict]], path: Optional[DPPath] = None
     ):
         """Update mean and stddev for descriptor elements."""
         for ii, descrpt in enumerate(self.descriptor_list):
diff --git a/deepmd/pt/model/descriptor/repformers.py b/deepmd/pt/model/descriptor/repformers.py
index 000bed2cb1..174daf14af 100644
--- a/deepmd/pt/model/descriptor/repformers.py
+++ b/deepmd/pt/model/descriptor/repformers.py
@@ -281,7 +281,7 @@ def forward(
         return g1, g2, h2, rot_mat.view(-1, nloc, self.dim_emb, 3), sw
 
     def compute_input_stats(
-        self, merged: Union[Callable, List], path: Optional[DPPath] = None
+        self, merged: Union[Callable, List[dict]], path: Optional[DPPath] = None
     ):
         """Update mean and stddev for descriptor elements."""
         env_mat_stat = EnvMatStatSe(self)
diff --git a/deepmd/pt/model/descriptor/se_a.py b/deepmd/pt/model/descriptor/se_a.py
index ca2c5ef5c2..5843534e04 100644
--- a/deepmd/pt/model/descriptor/se_a.py
+++ b/deepmd/pt/model/descriptor/se_a.py
@@ -131,7 +131,9 @@ def dim_out(self):
         """Returns the output dimension of this descriptor."""
         return self.sea.dim_out
 
-    def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
+    def compute_input_stats(
+        self, merged: Union[Callable, List[dict]], path: Optional[DPPath] = None
+    ):
         """Update mean and stddev for descriptor elements."""
         return self.sea.compute_input_stats(merged, path)
 
@@ -390,7 +392,7 @@ def __getitem__(self, key):
             raise KeyError(key)
 
     def compute_input_stats(
-        self, merged: Union[Callable, List], path: Optional[DPPath] = None
+        self, merged: Union[Callable, List[dict]], path: Optional[DPPath] = None
     ):
         """Update mean and stddev for descriptor elements."""
         env_mat_stat = EnvMatStatSe(self)
diff --git a/deepmd/pt/model/descriptor/se_atten.py b/deepmd/pt/model/descriptor/se_atten.py
index 60612f6046..a056fbe889 100644
--- a/deepmd/pt/model/descriptor/se_atten.py
+++ b/deepmd/pt/model/descriptor/se_atten.py
@@ -203,7 +203,7 @@ def dim_emb(self):
         return self.get_dim_emb()
 
     def compute_input_stats(
-        self, merged: Union[Callable, List], path: Optional[DPPath] = None
+        self, merged: Union[Callable, List[dict]], path: Optional[DPPath] = None
     ):
         """Update mean and stddev for descriptor elements."""
         env_mat_stat = EnvMatStatSe(self)
diff --git a/deepmd/pt/model/descriptor/se_r.py b/deepmd/pt/model/descriptor/se_r.py
index e8a2483da6..e3832b16e4 100644
--- a/deepmd/pt/model/descriptor/se_r.py
+++ b/deepmd/pt/model/descriptor/se_r.py
@@ -151,7 +151,7 @@ def mixed_types(self) -> bool:
         return False
 
     def compute_input_stats(
-        self, merged: Union[Callable, List], path: Optional[DPPath] = None
+        self, merged: Union[Callable, List[dict]], path: Optional[DPPath] = None
     ):
         """Update mean and stddev for descriptor elements."""
         env_mat_stat = EnvMatStatSe(self)

From 75da5b1f8f6bb24ba38a4616d3ba952a52a035c9 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Wed, 28 Feb 2024 20:54:39 +0800
Subject: [PATCH 07/37] Update training.py

---
 deepmd/pt/train/training.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index 01a8d0ca28..a7f0d9956a 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -105,8 +105,6 @@ def __init__(
         self.model_keys = (
             list(model_params["model_dict"]) if self.multi_task else ["Default"]
         )
-        # if self.multi_task and sampled is None:
-        #     sampled = {key: None for key in self.model_keys}
         self.rank = dist.get_rank() if dist.is_initialized() else 0
         self.world_size = dist.get_world_size() if dist.is_initialized() else 1
         self.num_model = len(self.model_keys)

From 6c171c5614ecc8508b63eca2632ac085a715e403 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Wed, 28 Feb 2024 21:48:01 +0800
Subject: [PATCH 08/37] Fix uts

---
 deepmd/dpmodel/model/dp_model.py              |  4 ++-
 deepmd/pt/model/task/dipole.py                | 10 ++++--
 deepmd/pt/model/task/ener.py                  | 14 ++++++--
 deepmd/pt/model/task/polarizability.py        | 14 ++++++--
 source/tests/pt/model/test_descriptor.py      |  4 +++
 source/tests/pt/model/test_dipole_fitting.py  | 14 ++++----
 source/tests/pt/model/test_embedding_net.py   |  5 +++
 .../pt/model/test_polarizability_fitting.py   | 23 +++++-------
 source/tests/pt/test_loss.py                  |  4 +++
 source/tests/pt/test_stat.py                  | 35 +++++++++++++++++++
 10 files changed, 99 insertions(+), 28 deletions(-)

diff --git a/deepmd/dpmodel/model/dp_model.py b/deepmd/dpmodel/model/dp_model.py
index 804ce51dfd..705750414b 100644
--- a/deepmd/dpmodel/model/dp_model.py
+++ b/deepmd/dpmodel/model/dp_model.py
@@ -14,4 +14,6 @@
 # use "class" to resolve "Variable not allowed in type expression"
 @BaseModel.register("standard")
 class DPModel(make_model(DPAtomicModel), BaseModel):
-    pass
+    def data_requirement(self) -> dict:
+        """Get the data requirement for the model."""
+        raise NotImplementedError
diff --git a/deepmd/pt/model/task/dipole.py b/deepmd/pt/model/task/dipole.py
index 68ce09a080..08a3673a8c 100644
--- a/deepmd/pt/model/task/dipole.py
+++ b/deepmd/pt/model/task/dipole.py
@@ -1,8 +1,10 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
 from typing import (
+    Callable,
     List,
     Optional,
+    Union,
 )
 
 import torch
@@ -91,7 +93,7 @@ def __init__(
         self.r_differentiable = r_differentiable
         self.c_differentiable = c_differentiable
         super().__init__(
-            var_name="dipole",
+            var_name="dipole" if "var_name" not in kwargs else kwargs.pop("var_name"),
             ntypes=ntypes,
             dim_descrpt=dim_descrpt,
             neuron=neuron,
@@ -134,7 +136,11 @@ def output_def(self) -> FittingOutputDef:
             ]
         )
 
-    def compute_output_stats(self, merged, stat_file_path: Optional[DPPath] = None):
+    def compute_output_stats(
+        self,
+        merged: Union[Callable, List[dict]],
+        stat_file_path: Optional[DPPath] = None,
+    ):
         raise NotImplementedError
 
     def forward(
diff --git a/deepmd/pt/model/task/ener.py b/deepmd/pt/model/task/ener.py
index ed9d517763..55ee79db25 100644
--- a/deepmd/pt/model/task/ener.py
+++ b/deepmd/pt/model/task/ener.py
@@ -2,9 +2,11 @@
 import copy
 import logging
 from typing import (
+    Callable,
     List,
     Optional,
     Tuple,
+    Union,
 )
 
 import numpy as np
@@ -138,13 +140,21 @@ def serialize(self) -> dict:
         data["atom_ener"] = self.atom_ener
         return data
 
-    def compute_output_stats(self, merged, stat_file_path: Optional[DPPath] = None):
+    def compute_output_stats(
+        self,
+        merged: Union[Callable, List[dict]],
+        stat_file_path: Optional[DPPath] = None,
+    ):
         if stat_file_path is not None:
             stat_file_path = stat_file_path / "bias_atom_e"
         if stat_file_path is not None and stat_file_path.is_file():
             bias_atom_e = stat_file_path.load_numpy()
         else:
-            sampled = merged()
+            if callable(merged):
+                # only get data for once
+                sampled = merged()
+            else:
+                sampled = merged
             energy = [item["energy"] for item in sampled]
             data_mixed_type = "real_natoms_vec" in sampled[0]
             if data_mixed_type:
diff --git a/deepmd/pt/model/task/polarizability.py b/deepmd/pt/model/task/polarizability.py
index 1bc4798c48..0fe817084e 100644
--- a/deepmd/pt/model/task/polarizability.py
+++ b/deepmd/pt/model/task/polarizability.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
 from typing import (
+    Callable,
     List,
     Optional,
     Union,
@@ -24,6 +25,9 @@
 from deepmd.pt.utils.utils import (
     to_numpy_array,
 )
+from deepmd.utils.path import (
+    DPPath,
+)
 
 log = logging.getLogger(__name__)
 
@@ -72,7 +76,6 @@ class PolarFittingNet(GeneralFitting):
 
     def __init__(
         self,
-        var_name: str,
         ntypes: int,
         dim_descrpt: int,
         embedding_width: int,
@@ -112,7 +115,7 @@ def __init__(
         ).view(ntypes, 1)
         self.shift_diag = shift_diag
         super().__init__(
-            var_name=var_name,
+            var_name="polar" if "var_name" not in kwargs else kwargs.pop("var_name"),
             ntypes=ntypes,
             dim_descrpt=dim_descrpt,
             neuron=neuron,
@@ -160,6 +163,13 @@ def output_def(self) -> FittingOutputDef:
             ]
         )
 
+    def compute_output_stats(
+        self,
+        merged: Union[Callable, List[dict]],
+        stat_file_path: Optional[DPPath] = None,
+    ):
+        raise NotImplementedError
+
     def forward(
         self,
         descriptor: torch.Tensor,
diff --git a/source/tests/pt/model/test_descriptor.py b/source/tests/pt/model/test_descriptor.py
index ffad27201a..7d21d1c13d 100644
--- a/source/tests/pt/model/test_descriptor.py
+++ b/source/tests/pt/model/test_descriptor.py
@@ -38,6 +38,9 @@
     op_module,
 )
 
+from ..test_stat import (
+    energy_data_requirement,
+)
 from .test_embedding_net import (
     get_single_batch,
 )
@@ -114,6 +117,7 @@ def setUp(self):
             self.systems[0],
             model_config["type_map"],
         )
+        ds.add_data_requirement(energy_data_requirement)
         self.np_batch, self.pt_batch = get_single_batch(ds)
         self.sec = np.cumsum(self.sel)
         self.ntypes = len(self.sel)
diff --git a/source/tests/pt/model/test_dipole_fitting.py b/source/tests/pt/model/test_dipole_fitting.py
index 83054f1042..fa4be9171c 100644
--- a/source/tests/pt/model/test_dipole_fitting.py
+++ b/source/tests/pt/model/test_dipole_fitting.py
@@ -114,12 +114,12 @@ def test_consistency(
             )
             ret2 = ft2(rd0, atype, gr, fparam=ifp, aparam=iap)
             np.testing.assert_allclose(
-                to_numpy_array(ret0["foo"]),
-                ret1["foo"],
+                to_numpy_array(ret0["dipole"]),
+                ret1["dipole"],
             )
             np.testing.assert_allclose(
-                to_numpy_array(ret0["foo"]),
-                to_numpy_array(ret2["foo"]),
+                to_numpy_array(ret0["dipole"]),
+                to_numpy_array(ret2["dipole"]),
             )
 
     def test_jit(
@@ -206,7 +206,7 @@ def test_rot(self):
                 )
 
                 ret0 = ft0(rd0, extended_atype, gr0, fparam=ifp, aparam=iap)
-                res.append(ret0["foo"])
+                res.append(ret0["dipole"])
 
             np.testing.assert_allclose(
                 to_numpy_array(res[1]), to_numpy_array(torch.matmul(res[0], rmat))
@@ -241,7 +241,7 @@ def test_permu(self):
             )
 
             ret0 = ft0(rd0, extended_atype, gr0, fparam=0, aparam=0)
-            res.append(ret0["foo"])
+            res.append(ret0["dipole"])
 
         np.testing.assert_allclose(
             to_numpy_array(res[0][:, idx_perm]), to_numpy_array(res[1])
@@ -281,7 +281,7 @@ def test_trans(self):
             )
 
             ret0 = ft0(rd0, extended_atype, gr0, fparam=0, aparam=0)
-            res.append(ret0["foo"])
+            res.append(ret0["dipole"])
 
         np.testing.assert_allclose(to_numpy_array(res[0]), to_numpy_array(res[1]))
 
diff --git a/source/tests/pt/model/test_embedding_net.py b/source/tests/pt/model/test_embedding_net.py
index 87e8a97444..a1895718dd 100644
--- a/source/tests/pt/model/test_embedding_net.py
+++ b/source/tests/pt/model/test_embedding_net.py
@@ -39,6 +39,10 @@
 )
 from deepmd.tf.descriptor import DescrptSeA as DescrptSeA_tf
 
+from ..test_stat import (
+    energy_data_requirement,
+)
+
 CUR_DIR = os.path.dirname(__file__)
 
 
@@ -128,6 +132,7 @@ def setUp(self):
             self.systems[0],
             model_config["type_map"],
         )
+        ds.add_data_requirement(energy_data_requirement)
         self.filter_neuron = model_config["descriptor"]["neuron"]
         self.axis_neuron = model_config["descriptor"]["axis_neuron"]
         self.np_batch, self.torch_batch = get_single_batch(ds)
diff --git a/source/tests/pt/model/test_polarizability_fitting.py b/source/tests/pt/model/test_polarizability_fitting.py
index f76a9e28ac..3b55f8bc05 100644
--- a/source/tests/pt/model/test_polarizability_fitting.py
+++ b/source/tests/pt/model/test_polarizability_fitting.py
@@ -67,7 +67,6 @@ def test_consistency(
             [None, self.scale],
         ):
             ft0 = PolarFittingNet(
-                "foo",
                 self.nt,
                 self.dd0.dim_out,
                 embedding_width=self.dd0.get_dim_emb(),
@@ -113,16 +112,16 @@ def test_consistency(
                 aparam=to_numpy_array(iap),
             )
             np.testing.assert_allclose(
-                to_numpy_array(ret0["foo"]),
-                ret1["foo"],
+                to_numpy_array(ret0["polar"]),
+                ret1["polar"],
             )
             np.testing.assert_allclose(
-                to_numpy_array(ret0["foo"]),
-                to_numpy_array(ret2["foo"]),
+                to_numpy_array(ret0["polar"]),
+                to_numpy_array(ret2["polar"]),
             )
             np.testing.assert_allclose(
-                to_numpy_array(ret0["foo"]),
-                ret3["foo"],
+                to_numpy_array(ret0["polar"]),
+                ret3["polar"],
             )
 
     def test_jit(
@@ -135,7 +134,6 @@ def test_jit(
             [True, False],
         ):
             ft0 = PolarFittingNet(
-                "foo",
                 self.nt,
                 self.dd0.dim_out,
                 embedding_width=self.dd0.get_dim_emb(),
@@ -177,7 +175,6 @@ def test_rot(self):
             [None, self.scale],
         ):
             ft0 = PolarFittingNet(
-                "foo",
                 self.nt,
                 self.dd0.dim_out,  # dim_descrpt
                 embedding_width=self.dd0.get_dim_emb(),
@@ -220,7 +217,7 @@ def test_rot(self):
                 )
 
                 ret0 = ft0(rd0, extended_atype, gr0, fparam=ifp, aparam=iap)
-                res.append(ret0["foo"])
+                res.append(ret0["polar"])
             np.testing.assert_allclose(
                 to_numpy_array(res[1]),
                 to_numpy_array(
@@ -235,7 +232,6 @@ def test_permu(self):
         coord = torch.matmul(self.coord, self.cell)
         for fit_diag, scale in itertools.product([True, False], [None, self.scale]):
             ft0 = PolarFittingNet(
-                "foo",
                 self.nt,
                 self.dd0.dim_out,
                 embedding_width=self.dd0.get_dim_emb(),
@@ -264,7 +260,7 @@ def test_permu(self):
                 )
 
                 ret0 = ft0(rd0, extended_atype, gr0, fparam=None, aparam=None)
-                res.append(ret0["foo"])
+                res.append(ret0["polar"])
 
             np.testing.assert_allclose(
                 to_numpy_array(res[0][:, idx_perm]),
@@ -281,7 +277,6 @@ def test_trans(self):
         )
         for fit_diag, scale in itertools.product([True, False], [None, self.scale]):
             ft0 = PolarFittingNet(
-                "foo",
                 self.nt,
                 self.dd0.dim_out,
                 embedding_width=self.dd0.get_dim_emb(),
@@ -309,7 +304,7 @@ def test_trans(self):
                 )
 
                 ret0 = ft0(rd0, extended_atype, gr0, fparam=0, aparam=0)
-                res.append(ret0["foo"])
+                res.append(ret0["polar"])
 
             np.testing.assert_allclose(to_numpy_array(res[0]), to_numpy_array(res[1]))
 
diff --git a/source/tests/pt/test_loss.py b/source/tests/pt/test_loss.py
index e117c7f05a..484d62a3ad 100644
--- a/source/tests/pt/test_loss.py
+++ b/source/tests/pt/test_loss.py
@@ -28,6 +28,9 @@
 from .model.test_embedding_net import (
     get_single_batch,
 )
+from .test_stat import (
+    energy_data_requirement,
+)
 
 CUR_DIR = os.path.dirname(__file__)
 
@@ -47,6 +50,7 @@ def get_batch():
     if isinstance(systems, str):
         systems = expand_sys_str(systems)
     dataset = DeepmdDataSetForLoader(systems[0], model_config["type_map"])
+    dataset.add_data_requirement(energy_data_requirement)
     np_batch, pt_batch = get_single_batch(dataset)
     return np_batch, pt_batch
 
diff --git a/source/tests/pt/test_stat.py b/source/tests/pt/test_stat.py
index 318b2e042f..54810fcc8f 100644
--- a/source/tests/pt/test_stat.py
+++ b/source/tests/pt/test_stat.py
@@ -47,6 +47,40 @@
 
 CUR_DIR = os.path.dirname(__file__)
 
+energy_data_requirement = {
+    "energy": {
+        "ndof": 1,
+        "atomic": False,
+        "must": False,
+        "high_prec": True,
+    },
+    "force": {
+        "ndof": 3,
+        "atomic": True,
+        "must": False,
+        "high_prec": False,
+    },
+    "virial": {
+        "ndof": 9,
+        "atomic": False,
+        "must": False,
+        "high_prec": False,
+    },
+    "atom_ener": {
+        "ndof": 1,
+        "atomic": True,
+        "must": False,
+        "high_prec": False,
+    },
+    "atom_pref": {
+        "ndof": 1,
+        "atomic": True,
+        "must": False,
+        "high_prec": False,
+        "repeat": 3,
+    },
+}
+
 
 def compare(ut, base, given):
     if isinstance(base, list):
@@ -111,6 +145,7 @@ def setUp(self):
         self.filter_neuron = model_config["descriptor"]["neuron"]
         self.axis_neuron = model_config["descriptor"]["axis_neuron"]
         self.n_neuron = model_config["fitting_net"]["neuron"]
+        self.my_dataset.add_data_requirement(energy_data_requirement)
 
         self.my_sampled = my_make(
             self.my_dataset.systems, self.my_dataset.dataloaders, self.data_stat_nbatch

From 2e87e1d11150cb164f1bc2b08e86e2ca5954ce08 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Wed, 28 Feb 2024 22:47:41 +0800
Subject: [PATCH 09/37] Fix uts

---
 deepmd/pt/model/model/model.py                       | 4 ++++
 source/tests/pt/model/test_model.py                  | 5 +++++
 source/tests/pt/model/test_polarizability_fitting.py | 1 -
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/deepmd/pt/model/model/model.py b/deepmd/pt/model/model/model.py
index e32d2f307d..0e2afadd14 100644
--- a/deepmd/pt/model/model/model.py
+++ b/deepmd/pt/model/model/model.py
@@ -84,3 +84,7 @@ def compute_or_load_stat(
             The path to the statistics files.
         """
         raise NotImplementedError
+
+    def data_requirement(self) -> dict:
+        """Get the data requirement for the model."""
+        raise NotImplementedError
diff --git a/source/tests/pt/model/test_model.py b/source/tests/pt/model/test_model.py
index d8c7de39c3..69ec88f5d7 100644
--- a/source/tests/pt/model/test_model.py
+++ b/source/tests/pt/model/test_model.py
@@ -51,6 +51,10 @@
     LearningRateExp,
 )
 
+from ..test_stat import (
+    energy_data_requirement,
+)
+
 VariableState = collections.namedtuple("VariableState", ["value", "gradient"])
 
 
@@ -281,6 +285,7 @@ def test_consistency(self):
                 "type_map": self.type_map,
             },
         )
+        my_ds.add_data_requirement(energy_data_requirement)
         my_model = get_model(
             model_params={
                 "descriptor": {
diff --git a/source/tests/pt/model/test_polarizability_fitting.py b/source/tests/pt/model/test_polarizability_fitting.py
index 3b55f8bc05..b1a5e3f730 100644
--- a/source/tests/pt/model/test_polarizability_fitting.py
+++ b/source/tests/pt/model/test_polarizability_fitting.py
@@ -323,7 +323,6 @@ def setUp(self):
         self.atype = torch.IntTensor([0, 0, 0, 1, 1], device="cpu")
         self.dd0 = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE)
         self.ft0 = PolarFittingNet(
-            "polar",
             self.nt,
             self.dd0.dim_out,
             embedding_width=self.dd0.get_dim_emb(),

From 2618d988aa9cf35f14e2259a84e99675e71c0da7 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 29 Feb 2024 01:08:46 +0800
Subject: [PATCH 10/37] Support multi-task training

---
 .../descriptor/make_base_descriptor.py        |   5 +
 deepmd/dpmodel/descriptor/se_e2_a.py          |   4 +
 deepmd/dpmodel/descriptor/se_r.py             |   4 +
 deepmd/pt/model/descriptor/__init__.py        |   4 +
 deepmd/pt/model/descriptor/dpa1.py            |  18 ++
 deepmd/pt/model/descriptor/dpa2.py            |  35 ++++
 deepmd/pt/model/descriptor/hybrid.py          |   2 -
 deepmd/pt/model/descriptor/se_a.py            |  13 ++
 deepmd/pt/model/task/fitting.py               |  12 --
 deepmd/pt/train/wrapper.py                    |   4 +-
 deepmd/pt/utils/multi_task.py                 | 101 ++++++----
 source/tests/pt/model/water/multitask.json    | 139 ++++++++++++++
 source/tests/pt/test_multitask.py             | 173 ++++++++++++++++++
 source/tests/pt/test_training.py              |   9 -
 14 files changed, 462 insertions(+), 61 deletions(-)
 create mode 100644 source/tests/pt/model/water/multitask.json
 create mode 100644 source/tests/pt/test_multitask.py

diff --git a/deepmd/dpmodel/descriptor/make_base_descriptor.py b/deepmd/dpmodel/descriptor/make_base_descriptor.py
index ab4c206fdf..db0611b184 100644
--- a/deepmd/dpmodel/descriptor/make_base_descriptor.py
+++ b/deepmd/dpmodel/descriptor/make_base_descriptor.py
@@ -86,6 +86,11 @@ def mixed_types(self) -> bool:
             """
             pass
 
+        @abstractmethod
+        def share_params(self, base_class, shared_level, resume=False):
+            """Share the parameters of self to the base_class with shared_level."""
+            pass
+
         def compute_input_stats(
             self, merged: Union[Callable, List[dict]], path: Optional[DPPath] = None
         ):
diff --git a/deepmd/dpmodel/descriptor/se_e2_a.py b/deepmd/dpmodel/descriptor/se_e2_a.py
index b102933ac9..45dff13129 100644
--- a/deepmd/dpmodel/descriptor/se_e2_a.py
+++ b/deepmd/dpmodel/descriptor/se_e2_a.py
@@ -240,6 +240,10 @@ def mixed_types(self):
         """
         return False
 
+    def share_params(self, base_class, shared_level, resume=False):
+        """Share the parameters of self to the base_class with shared_level."""
+        raise NotImplementedError
+
     def get_ntypes(self) -> int:
         """Returns the number of element types."""
         return self.ntypes
diff --git a/deepmd/dpmodel/descriptor/se_r.py b/deepmd/dpmodel/descriptor/se_r.py
index 5973c55353..f2f60ca6c1 100644
--- a/deepmd/dpmodel/descriptor/se_r.py
+++ b/deepmd/dpmodel/descriptor/se_r.py
@@ -200,6 +200,10 @@ def mixed_types(self):
         """
         return False
 
+    def share_params(self, base_class, shared_level, resume=False):
+        """Share the parameters of self to the base_class with shared_level."""
+        raise NotImplementedError
+
     def get_ntypes(self) -> int:
         """Returns the number of element types."""
         return self.ntypes
diff --git a/deepmd/pt/model/descriptor/__init__.py b/deepmd/pt/model/descriptor/__init__.py
index 5fd644f149..4796357faa 100644
--- a/deepmd/pt/model/descriptor/__init__.py
+++ b/deepmd/pt/model/descriptor/__init__.py
@@ -1,4 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+from .base_descriptor import (
+    BaseDescriptor,
+)
 from .descriptor import (
     DescriptorBlock,
     make_default_type_embedding,
@@ -31,6 +34,7 @@
 )
 
 __all__ = [
+    "BaseDescriptor",
     "DescriptorBlock",
     "make_default_type_embedding",
     "DescrptBlockSeA",
diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py
index 1d8da3469f..c64f1e7f9a 100644
--- a/deepmd/pt/model/descriptor/dpa1.py
+++ b/deepmd/pt/model/descriptor/dpa1.py
@@ -122,6 +122,24 @@ def mixed_types(self) -> bool:
         """
         return self.se_atten.mixed_types()
 
+    def share_params(self, base_class, shared_level, resume=False):
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only descriptors of the same type can share params!"
+        # For DPA1 descriptors, the user-defined share-level
+        # shared_level: 0
+        # share all parameters in both type_embedding and se_atten
+        if shared_level == 0:
+            self._modules["type_embedding"] = base_class._modules["type_embedding"]
+            self.se_atten.share_params(base_class.se_atten, 0, resume=resume)
+        # shared_level: 1
+        # share all parameters in type_embedding
+        elif shared_level == 1:
+            self._modules["type_embedding"] = base_class._modules["type_embedding"]
+        # Other shared levels
+        else:
+            raise NotImplementedError
+
     @property
     def dim_out(self):
         return self.get_dim_out()
diff --git a/deepmd/pt/model/descriptor/dpa2.py b/deepmd/pt/model/descriptor/dpa2.py
index ee86d81fae..69269b1b56 100644
--- a/deepmd/pt/model/descriptor/dpa2.py
+++ b/deepmd/pt/model/descriptor/dpa2.py
@@ -288,6 +288,41 @@ def mixed_types(self) -> bool:
         """
         return True
 
+    def share_params(self, base_class, shared_level, resume=False):
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only descriptors of the same type can share params!"
+        # For DPA2 descriptors, the user-defined share-level
+        # shared_level: 0
+        # share all parameters in type_embedding, repinit and repformers
+        if shared_level == 0:
+            self._modules["type_embedding"] = base_class._modules["type_embedding"]
+            self.repinit.share_params(base_class.repinit, 0, resume=resume)
+            self._modules["g1_shape_tranform"] = base_class._modules[
+                "g1_shape_tranform"
+            ]
+            self.repformers.share_params(base_class.repformers, 0, resume=resume)
+        # shared_level: 1
+        # share all parameters in type_embedding and repinit
+        elif shared_level == 1:
+            self._modules["type_embedding"] = base_class._modules["type_embedding"]
+            self.repinit.share_params(base_class.repinit, 0, resume=resume)
+        # shared_level: 2
+        # share all parameters in type_embedding and repformers
+        elif shared_level == 2:
+            self._modules["type_embedding"] = base_class._modules["type_embedding"]
+            self._modules["g1_shape_tranform"] = base_class._modules[
+                "g1_shape_tranform"
+            ]
+            self.repformers.share_params(base_class.repformers, 0, resume=resume)
+        # shared_level: 3
+        # share all parameters in type_embedding
+        elif shared_level == 3:
+            self._modules["type_embedding"] = base_class._modules["type_embedding"]
+        # Other shared levels
+        else:
+            raise NotImplementedError
+
     @property
     def dim_out(self):
         return self.get_dim_out()
diff --git a/deepmd/pt/model/descriptor/hybrid.py b/deepmd/pt/model/descriptor/hybrid.py
index a761204d64..872f642ef5 100644
--- a/deepmd/pt/model/descriptor/hybrid.py
+++ b/deepmd/pt/model/descriptor/hybrid.py
@@ -154,8 +154,6 @@ def share_params(self, base_class, shared_level, resume=False):
                 self.descriptor_list[ii].share_params(
                     base_class.descriptor_list[ii], shared_level, resume=resume
                 )
-            if self.hybrid_mode == "sequential":
-                self.sequential_transform = base_class.sequential_transform
         else:
             raise NotImplementedError
 
diff --git a/deepmd/pt/model/descriptor/se_a.py b/deepmd/pt/model/descriptor/se_a.py
index 5843534e04..355c917dcf 100644
--- a/deepmd/pt/model/descriptor/se_a.py
+++ b/deepmd/pt/model/descriptor/se_a.py
@@ -126,6 +126,19 @@ def mixed_types(self):
         """
         return self.sea.mixed_types()
 
+    def share_params(self, base_class, shared_level, resume=False):
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only descriptors of the same type can share params!"
+        # For SeA descriptors, the user-defined share-level
+        # shared_level: 0
+        # share all parameters in sea
+        if shared_level == 0:
+            self.sea.share_params(base_class.sea, 0, resume=resume)
+        # Other shared levels
+        else:
+            raise NotImplementedError
+
     @property
     def dim_out(self):
         """Returns the output dimension of this descriptor."""
diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
index 20876d9be7..a964f0222d 100644
--- a/deepmd/pt/model/task/fitting.py
+++ b/deepmd/pt/model/task/fitting.py
@@ -77,18 +77,6 @@ def share_params(self, base_class, shared_level, resume=False):
             # the following will successfully link all the params except buffers, which need manually link.
             for item in self._modules:
                 self._modules[item] = base_class._modules[item]
-        elif shared_level == 2:
-            # share all the layers before final layer
-            # the following will successfully link all the params except buffers, which need manually link.
-            self._modules["filter_layers"][0].deep_layers = base_class._modules[
-                "filter_layers"
-            ][0].deep_layers
-        elif shared_level == 3:
-            # share the first layers
-            # the following will successfully link all the params except buffers, which need manually link.
-            self._modules["filter_layers"][0].deep_layers[0] = base_class._modules[
-                "filter_layers"
-            ][0].deep_layers[0]
         else:
             raise NotImplementedError
 
diff --git a/deepmd/pt/train/wrapper.py b/deepmd/pt/train/wrapper.py
index 49619b19ea..52cc636c10 100644
--- a/deepmd/pt/train/wrapper.py
+++ b/deepmd/pt/train/wrapper.py
@@ -61,7 +61,7 @@ def __init__(
         self.inference_only = self.loss is None
 
     def set_trainable_params(self):
-        supported_types = ["type_embedding", "descriptor", "fitting_net"]
+        supported_types = ["descriptor", "fitting_net"]
         for model_item in self.model:
             for net_type in supported_types:
                 trainable = True
@@ -83,7 +83,7 @@ def set_trainable_params(self):
                         param.requires_grad = trainable
 
     def share_params(self, shared_links, resume=False):
-        supported_types = ["type_embedding", "descriptor", "fitting_net"]
+        supported_types = ["descriptor", "fitting_net"]
         for shared_item in shared_links:
             class_name = shared_links[shared_item]["type"]
             shared_base = shared_links[shared_item]["links"][0]
diff --git a/deepmd/pt/utils/multi_task.py b/deepmd/pt/utils/multi_task.py
index f97a826b03..ae3933a101 100644
--- a/deepmd/pt/utils/multi_task.py
+++ b/deepmd/pt/utils/multi_task.py
@@ -4,17 +4,10 @@
 )
 
 from deepmd.pt.model.descriptor import (
-    DescrptDPA1,
-    DescrptDPA2,
-    DescrptSeA,
-)
-from deepmd.pt.model.network.network import (
-    TypeEmbedNet,
+    BaseDescriptor,
 )
 from deepmd.pt.model.task import (
-    EnergyFittingNet,
-    EnergyFittingNetDirect,
-    FittingNetAttenLcc,
+    BaseFitting,
 )
 
 
@@ -37,9 +30,68 @@ def preprocess_shared_params(model_config):
             - "shared_level": Shared level (int) of this item in this model.
                 Lower for more params to share, 0 means to share all params in this item.
             This list are sorted by "shared_level".
+    For example, if one has `model_config` like this:
+    "model": {
+        "shared_dict": {
+            "my_type_map": ["foo", "bar"],
+            "my_des1": {
+                "type": "se_e2_a",
+                "neuron": [10, 20, 40]
+                },
+        },
+        "model_dict": {
+            "model_1": {
+                "type_map": "my_type_map",
+                "descriptor": "my_des1",
+                "fitting_net": {
+                    "neuron": [100, 100, 100]
+                }
+            },
+            "model_2": {
+                "type_map": "my_type_map",
+                "descriptor": "my_des1",
+                "fitting_net": {
+                    "neuron": [100, 100, 100]
+                }
+            }
+            "model_3": {
+                "type_map": "my_type_map",
+                "descriptor": "my_des1:1",
+                "fitting_net": {
+                    "neuron": [100, 100, 100]
+                }
+            }
+        }
+    }
+    The above config will init three model branches named `model_1` and `model_2` and `model_3`,
+    in which:
+        - `model_2` and `model_3` will have the same `type_map` as that in `model_1`.
+        - `model_2` will share all the parameters of `descriptor` with `model_1`,
+        while `model_3` will share part of parameters of `descriptor` with `model_1`
+        on human-defined share-level `1` (default is `0`, meaning share all the parameters).
+        - `model_1`, `model_2` and `model_3` have three different `fitting_net`s.
+    The returned `model_config` will automatically fulfill the input `model_config` as if there's no sharing,
+    and the `shared_links` will keep all the sharing information with looking:
+    {
+    'my_des1': {
+        'type': 'DescrptSeA',
+        'links': [
+            {'model_key': 'model_1',
+            'shared_type': 'descriptor',
+            'shared_level': 0},
+            {'model_key': 'model_2',
+            'shared_type': 'descriptor',
+            'shared_level': 0},
+            {'model_key': 'model_3',
+            'shared_type': 'descriptor',
+            'shared_level': 1}
+            ]
+        }
+    }
+
     """
     assert "model_dict" in model_config, "only multi-task model can use this method!"
-    supported_types = ["type_map", "type_embedding", "descriptor", "fitting_net"]
+    supported_types = ["type_map", "descriptor", "fitting_net"]
     shared_dict = model_config.get("shared_dict", {})
     shared_links = {}
     type_map_keys = []
@@ -98,32 +150,9 @@ def replace_one_item(params_dict, key_type, key_in_dict, suffix="", index=None):
 
 
 def get_class_name(item_key, item_params):
-    if item_key == "type_embedding":
-        return TypeEmbedNet.__name__
-    elif item_key == "descriptor":
-        item_type = item_params.get("type", "se_e2_a")
-        if item_type == "se_e2_a":
-            return DescrptSeA.__name__
-        elif item_type in ["se_atten", "dpa1"]:
-            return DescrptDPA1.__name__
-        elif item_type in ["dpa2"]:
-            return DescrptDPA2.__name__
-        # todo add support for other combination
-        # elif item_type == "gaussian_lcc":
-        #     return DescrptGaussianLcc.__name__
-        # elif item_type == "hybrid":
-        #     return DescrptHybrid.__name__
-        else:
-            raise RuntimeError(f"Unknown descriptor type {item_type}")
+    if item_key == "descriptor":
+        return BaseDescriptor.get_class_by_type(item_params.get("type", "se_e2_a"))
     elif item_key == "fitting_net":
-        item_type = item_params.get("type", "ener")
-        if item_type == "ener":
-            return EnergyFittingNet.__name__
-        elif item_type in ["direct_force", "direct_force_ener"]:
-            return EnergyFittingNetDirect.__name__
-        elif item_type == "atten_vec_lcc":
-            return FittingNetAttenLcc.__name__
-        else:
-            raise RuntimeError(f"Unknown fitting_net type {item_type}")
+        return BaseFitting.get_class_by_type(item_params.get("type", "ener"))
     else:
         raise RuntimeError(f"Unknown class_name type {item_key}")
diff --git a/source/tests/pt/model/water/multitask.json b/source/tests/pt/model/water/multitask.json
new file mode 100644
index 0000000000..6baddd672b
--- /dev/null
+++ b/source/tests/pt/model/water/multitask.json
@@ -0,0 +1,139 @@
+{
+  "model": {
+    "shared_dict": {
+      "my_type_map": [
+        "O",
+        "H",
+        "B"
+      ],
+      "my_descriptor": {
+        "type": "se_e2_a",
+        "sel": [
+          46,
+          92
+        ],
+        "rcut_smth": 0.50,
+        "rcut": 6.00,
+        "neuron": [
+          25,
+          50,
+          100
+        ],
+        "resnet_dt": false,
+        "axis_neuron": 16,
+        "seed": 1,
+        "_comment": " that's all"
+      },
+      "_comment": "that's all"
+    },
+    "model_dict": {
+      "model_1": {
+        "type_map": "my_type_map",
+        "descriptor": "my_descriptor",
+        "fitting_net": {
+          "neuron": [
+            240,
+            240,
+            240
+          ],
+          "resnet_dt": true,
+          "seed": 1,
+          "_comment": " that's all"
+        }
+      },
+      "model_2": {
+        "type_map": "my_type_map",
+        "descriptor": "my_descriptor",
+        "fitting_net": {
+          "neuron": [
+            240,
+            240,
+            240
+          ],
+          "resnet_dt": true,
+          "seed": 1,
+          "_comment": " that's all"
+        }
+      }
+    }
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.0002,
+    "decay_rate": 0.98,
+    "stop_lr": 3.51e-08,
+    "_comment": "that's all"
+  },
+  "loss_dict": {
+    "_comment": " that's all",
+    "model_1": {
+      "type": "ener",
+      "start_pref_e": 0.02,
+      "limit_pref_e": 1,
+      "start_pref_f": 1000,
+      "limit_pref_f": 1,
+      "start_pref_v": 0,
+      "limit_pref_v": 0
+    },
+    "model_2": {
+      "type": "ener",
+      "start_pref_e": 0.02,
+      "limit_pref_e": 1,
+      "start_pref_f": 1000,
+      "limit_pref_f": 1,
+      "start_pref_v": 0,
+      "limit_pref_v": 0
+    }
+  },
+  "training": {
+    "model_prob": {
+      "model_1": 0.5,
+      "model_2": 0.5
+    },
+    "data_dict": {
+      "model_1": {
+        "stat_file": "./stat_files/model_1",
+        "training_data": {
+          "systems": [
+            "pt/water/data/data_0"
+          ],
+          "batch_size": 1,
+          "_comment": "that's all"
+        },
+        "validation_data": {
+          "systems": [
+            "pt/water/data/data_0"
+          ],
+          "batch_size": 1,
+          "_comment": "that's all"
+        }
+      },
+      "model_2": {
+        "stat_file": "./stat_files/model_2",
+        "training_data": {
+          "systems": [
+            "pt/water/data/data_0"
+          ],
+          "batch_size": 1,
+          "_comment": "that's all"
+        },
+        "validation_data": {
+          "systems": [
+            "pt/water/data/data_0"
+          ],
+          "batch_size": 1,
+          "_comment": "that's all"
+        }
+      }
+    },
+    "numb_steps": 100000,
+    "warmup_steps": 0,
+    "gradient_max_norm": 5.0,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 100,
+    "_comment": "that's all"
+  }
+}
diff --git a/source/tests/pt/test_multitask.py b/source/tests/pt/test_multitask.py
new file mode 100644
index 0000000000..c7a2784367
--- /dev/null
+++ b/source/tests/pt/test_multitask.py
@@ -0,0 +1,173 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import shutil
+import unittest
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+
+import torch
+
+from deepmd.pt.entrypoints.main import (
+    get_trainer,
+)
+
+from .model.test_permutation import (
+    model_dpa1,
+    model_dpa2,
+    model_se_e2_a,
+)
+
+multitask_template_json = str(Path(__file__).parent / "water/multitask.json")
+with open(multitask_template_json) as f:
+    multitask_template = json.load(f)
+
+
+class MultiTaskTrainTest:
+    def test_multitask_train(self):
+        trainer = get_trainer(deepcopy(self.config))
+        trainer.run()
+        # check model keys
+        self.assertEqual(len(trainer.wrapper.model), 2)
+        self.assertTrue("model_1" in trainer.wrapper.model)
+        self.assertTrue("model_2" in trainer.wrapper.model)
+
+        # check shared parameters
+        multi_state_dict = trainer.wrapper.model.state_dict()
+        for state_key in multi_state_dict:
+            if "model_1" in state_key:
+                self.assertTrue(
+                    state_key.replace("model_1", "model_2") in multi_state_dict
+                )
+            if "model_2" in state_key:
+                self.assertTrue(
+                    state_key.replace("model_2", "model_1") in multi_state_dict
+                )
+            if "model_1.descriptor" in state_key:
+                torch.testing.assert_allclose(
+                    multi_state_dict[state_key],
+                    multi_state_dict[state_key.replace("model_1", "model_2")],
+                )
+        self.tearDown()
+
+    def tearDown(self):
+        for f in os.listdir("."):
+            if f.startswith("model") and f.endswith(".pt"):
+                os.remove(f)
+            if f in ["lcurve.out"]:
+                os.remove(f)
+            if f in [self.stat_files]:
+                shutil.rmtree(f)
+
+
+class TestMultiTaskSeA(unittest.TestCase, MultiTaskTrainTest):
+    def setUp(self):
+        multitask_se_e2_a = deepcopy(multitask_template)
+        multitask_se_e2_a["model"]["shared_dict"]["my_descriptor"] = model_se_e2_a[
+            "descriptor"
+        ]
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.stat_files = "se_e2_a"
+        os.makedirs(self.stat_files, exist_ok=True)
+        self.config = multitask_se_e2_a
+        self.config["training"]["data_dict"]["model_1"]["training_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_1"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_1"][
+            "stat_file"
+        ] = f"{self.stat_files}/model_1"
+        self.config["training"]["data_dict"]["model_2"]["training_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_2"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_2"][
+            "stat_file"
+        ] = f"{self.stat_files}/model_2"
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+
+    def tearDown(self) -> None:
+        MultiTaskTrainTest.tearDown(self)
+
+
+class TestMultiTaskDPA1(unittest.TestCase, MultiTaskTrainTest):
+    def setUp(self):
+        multitask_DPA1 = deepcopy(multitask_template)
+        multitask_DPA1["model"]["shared_dict"]["my_descriptor"] = model_dpa1[
+            "descriptor"
+        ]
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.stat_files = "DPA1"
+        os.makedirs(self.stat_files, exist_ok=True)
+        self.config = multitask_DPA1
+        self.config["training"]["data_dict"]["model_1"]["training_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_1"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_1"][
+            "stat_file"
+        ] = f"{self.stat_files}/model_1"
+        self.config["training"]["data_dict"]["model_2"]["training_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_2"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_2"][
+            "stat_file"
+        ] = f"{self.stat_files}/model_2"
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+
+    def tearDown(self) -> None:
+        MultiTaskTrainTest.tearDown(self)
+
+
+class TestMultiTaskDPA2(unittest.TestCase, MultiTaskTrainTest):
+    def setUp(self):
+        multitask_DPA2 = deepcopy(multitask_template)
+        multitask_DPA2["model"]["shared_dict"]["my_descriptor"] = model_dpa2[
+            "descriptor"
+        ]
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.stat_files = "DPA2"
+        os.makedirs(self.stat_files, exist_ok=True)
+        self.config = multitask_DPA2
+        self.config["training"]["data_dict"]["model_1"]["training_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_1"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_1"][
+            "stat_file"
+        ] = f"{self.stat_files}/model_1"
+        self.config["training"]["data_dict"]["model_2"]["training_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_2"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_2"][
+            "stat_file"
+        ] = f"{self.stat_files}/model_2"
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+
+    def tearDown(self) -> None:
+        MultiTaskTrainTest.tearDown(self)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pt/test_training.py b/source/tests/pt/test_training.py
index 2186467788..4e73fc4f8a 100644
--- a/source/tests/pt/test_training.py
+++ b/source/tests/pt/test_training.py
@@ -79,15 +79,6 @@ def setUp(self):
         self.config["training"]["training_data"]["systems"] = data_file
         self.config["training"]["validation_data"]["systems"] = data_file
         self.config["model"] = deepcopy(model_dpa2)
-        self.config["model"]["descriptor"]["rcut"] = self.config["model"]["descriptor"][
-            "repinit_rcut"
-        ]
-        self.config["model"]["descriptor"]["rcut_smth"] = self.config["model"][
-            "descriptor"
-        ]["repinit_rcut_smth"]
-        self.config["model"]["descriptor"]["sel"] = self.config["model"]["descriptor"][
-            "repinit_nsel"
-        ]
         self.config["training"]["numb_steps"] = 1
         self.config["training"]["save_freq"] = 1
 

From f1585b2d7aa088b349bda8ec6916c410c9bc82cf Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 29 Feb 2024 01:32:19 +0800
Subject: [PATCH 11/37] Take advice from QL scan

---
 source/tests/pt/test_multitask.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/source/tests/pt/test_multitask.py b/source/tests/pt/test_multitask.py
index c7a2784367..0c1abf1f44 100644
--- a/source/tests/pt/test_multitask.py
+++ b/source/tests/pt/test_multitask.py
@@ -33,20 +33,16 @@ def test_multitask_train(self):
         trainer.run()
         # check model keys
         self.assertEqual(len(trainer.wrapper.model), 2)
-        self.assertTrue("model_1" in trainer.wrapper.model)
-        self.assertTrue("model_2" in trainer.wrapper.model)
+        self.assertIn("model_1", trainer.wrapper.model)
+        self.assertIn("model_2", trainer.wrapper.model)
 
         # check shared parameters
         multi_state_dict = trainer.wrapper.model.state_dict()
         for state_key in multi_state_dict:
             if "model_1" in state_key:
-                self.assertTrue(
-                    state_key.replace("model_1", "model_2") in multi_state_dict
-                )
+                self.assertIn(state_key.replace("model_1", "model_2"), multi_state_dict)
             if "model_2" in state_key:
-                self.assertTrue(
-                    state_key.replace("model_2", "model_1") in multi_state_dict
-                )
+                self.assertIn(state_key.replace("model_2", "model_1"), multi_state_dict)
             if "model_1.descriptor" in state_key:
                 torch.testing.assert_allclose(
                     multi_state_dict[state_key],

From 463f9fbafaba27c0782678bb50e531d8b37e267f Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 29 Feb 2024 01:33:47 +0800
Subject: [PATCH 12/37] Support no validation

---
 deepmd/pt/entrypoints/main.py | 19 ++++++---
 deepmd/pt/train/training.py   | 72 +++++++++++++++++++++--------------
 2 files changed, 56 insertions(+), 35 deletions(-)

diff --git a/deepmd/pt/entrypoints/main.py b/deepmd/pt/entrypoints/main.py
index c9eba6e579..340f783539 100644
--- a/deepmd/pt/entrypoints/main.py
+++ b/deepmd/pt/entrypoints/main.py
@@ -91,9 +91,12 @@ def prepare_trainer_input_single(
         type_split = False
         if model_params_single["descriptor"]["type"] in ["se_e2_a"]:
             type_split = True
-        validation_dataset_params = data_dict_single["validation_data"]
+        validation_dataset_params = data_dict_single.get("validation_data", None)
+        validation_systems = (
+            validation_dataset_params["systems"] if validation_dataset_params else None
+        )
         training_systems = training_dataset_params["systems"]
-        validation_systems = validation_dataset_params["systems"]
+
         # stat files
         stat_file_path_single = data_dict_single.get("stat_file", None)
         if stat_file_path_single is not None:
@@ -107,10 +110,14 @@ def prepare_trainer_input_single(
             stat_file_path_single = DPPath(stat_file_path_single, "a")
 
         # validation and training data
-        validation_data_single = DpLoaderSet(
-            validation_systems,
-            validation_dataset_params["batch_size"],
-            model_params_single,
+        validation_data_single = (
+            DpLoaderSet(
+                validation_systems,
+                validation_dataset_params["batch_size"],
+                model_params_single,
+            )
+            if validation_systems
+            else None
         )
         if ckpt or finetune_model:
             train_data_single = DpLoaderSet(
diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index a7f0d9956a..55609d6dc3 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -57,7 +57,6 @@
 if torch.__version__.startswith("2"):
     import torch._dynamo
 
-
 import torch.distributed as dist
 from torch.nn.parallel import DistributedDataParallel as DDP
 from torch.utils.data import (
@@ -142,20 +141,7 @@ def get_data_loader(_training_data, _validation_data, _training_params):
             else:
                 train_sampler = get_weighted_sampler(_training_data, "prob_sys_size")
 
-            if "auto_prob" in _training_params["validation_data"]:
-                valid_sampler = get_weighted_sampler(
-                    _validation_data, _training_params["validation_data"]["auto_prob"]
-                )
-            elif "sys_probs" in _training_params["validation_data"]:
-                valid_sampler = get_weighted_sampler(
-                    _validation_data,
-                    _training_params["validation_data"]["sys_probs"],
-                    sys_prob=True,
-                )
-            else:
-                valid_sampler = get_weighted_sampler(_validation_data, "prob_sys_size")
-
-            if train_sampler is None or valid_sampler is None:
+            if train_sampler is None:
                 log.warning(
                     "Sampler not specified!"
                 )  # None sampler will lead to a premature stop iteration. Replacement should be True in attribute of the sampler to produce expected number of items in one iteration.
@@ -169,22 +155,43 @@ def get_data_loader(_training_data, _validation_data, _training_params):
             )
             with torch.device("cpu"):
                 training_data_buffered = BufferedIterator(iter(training_dataloader))
-            validation_dataloader = DataLoader(
-                _validation_data,
-                sampler=valid_sampler,
-                batch_size=None,
-                num_workers=min(NUM_WORKERS, 1),
-                drop_last=False,
-                pin_memory=True,
-            )
-
-            with torch.device("cpu"):
-                validation_data_buffered = BufferedIterator(iter(validation_dataloader))
-            if _training_params.get("validation_data", None) is not None:
-                valid_numb_batch = _training_params["validation_data"].get(
-                    "numb_btch", 1
+            if _validation_data is not None:
+                if "auto_prob" in _training_params["validation_data"]:
+                    valid_sampler = get_weighted_sampler(
+                        _validation_data,
+                        _training_params["validation_data"]["auto_prob"],
+                    )
+                elif "sys_probs" in _training_params["validation_data"]:
+                    valid_sampler = get_weighted_sampler(
+                        _validation_data,
+                        _training_params["validation_data"]["sys_probs"],
+                        sys_prob=True,
+                    )
+                else:
+                    valid_sampler = get_weighted_sampler(
+                        _validation_data, "prob_sys_size"
+                    )
+                validation_dataloader = DataLoader(
+                    _validation_data,
+                    sampler=valid_sampler,
+                    batch_size=None,
+                    num_workers=min(NUM_WORKERS, 1),
+                    drop_last=False,
+                    pin_memory=True,
                 )
+                with torch.device("cpu"):
+                    validation_data_buffered = BufferedIterator(
+                        iter(validation_dataloader)
+                    )
+                if _training_params.get("validation_data", None) is not None:
+                    valid_numb_batch = _training_params["validation_data"].get(
+                        "numb_btch", 1
+                    )
+                else:
+                    valid_numb_batch = 1
             else:
+                validation_dataloader = None
+                validation_data_buffered = None
                 valid_numb_batch = 1
             return (
                 training_dataloader,
@@ -645,6 +652,9 @@ def log_loss_valid(_task_key="Default"):
                         input_dict, label_dict, _ = self.get_data(
                             is_train=False, task_key=_task_key
                         )
+                        if input_dict == {}:
+                            # no validation data
+                            return "", None
                         _, loss, more_loss = self.wrapper(
                             **input_dict,
                             cur_lr=pref_lr,
@@ -806,6 +816,8 @@ def get_data(self, is_train=True, task_key="Default"):
                         )
                     batch_data = next(iter(self.training_data))
             else:
+                if self.validation_data is None:
+                    return {}, {}, {}
                 try:
                     batch_data = next(iter(self.validation_data))
                 except StopIteration:
@@ -824,6 +836,8 @@ def get_data(self, is_train=True, task_key="Default"):
                     )
                     batch_data = next(iter(self.training_data[task_key]))
             else:
+                if self.validation_data[task_key] is None:
+                    return {}, {}, {}
                 try:
                     batch_data = next(iter(self.validation_data[task_key]))
                 except StopIteration:

From e8575affb1ced965eab3cfbd64d40ecbfaff607a Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 29 Feb 2024 02:46:02 +0800
Subject: [PATCH 13/37] Update se_r.py

---
 deepmd/pt/model/descriptor/se_r.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/deepmd/pt/model/descriptor/se_r.py b/deepmd/pt/model/descriptor/se_r.py
index e3832b16e4..e16297b5e6 100644
--- a/deepmd/pt/model/descriptor/se_r.py
+++ b/deepmd/pt/model/descriptor/se_r.py
@@ -150,6 +150,34 @@ def mixed_types(self) -> bool:
         """
         return False
 
+    def share_params(self, base_class, shared_level, resume=False):
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only descriptors of the same type can share params!"
+        # For SeR descriptors, the user-defined share-level
+        # shared_level: 0
+        if shared_level == 0:
+            # link buffers
+            if hasattr(self, "mean") and not resume:
+                # in case of change params during resume
+                base_env = EnvMatStatSe(base_class)
+                base_env.stats = base_class.stats
+                for kk in base_class.get_stats():
+                    base_env.stats[kk] += self.get_stats()[kk]
+                mean, stddev = base_env()
+                if not base_class.set_davg_zero:
+                    base_class.mean.copy_(torch.tensor(mean, device=env.DEVICE))
+                base_class.stddev.copy_(torch.tensor(stddev, device=env.DEVICE))
+                self.mean = base_class.mean
+                self.stddev = base_class.stddev
+            # self.load_state_dict(base_class.state_dict()) # this does not work, because it only inits the model
+            # the following will successfully link all the params except buffers
+            for item in self._modules:
+                self._modules[item] = base_class._modules[item]
+        # Other shared levels
+        else:
+            raise NotImplementedError
+
     def compute_input_stats(
         self, merged: Union[Callable, List[dict]], path: Optional[DPPath] = None
     ):

From 66d03b8754b05e816e55113cd4ff64eb4b3ae245 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 29 Feb 2024 02:49:42 +0800
Subject: [PATCH 14/37] omit data prob log

---
 deepmd/pt/utils/dataloader.py | 2 +-
 deepmd/pt/utils/finetune.py   | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/deepmd/pt/utils/dataloader.py b/deepmd/pt/utils/dataloader.py
index 9d30748321..70993c21a0 100644
--- a/deepmd/pt/utils/dataloader.py
+++ b/deepmd/pt/utils/dataloader.py
@@ -253,7 +253,7 @@ def get_weighted_sampler(training_data, prob_style, sys_prob=False):
             probs = prob_sys_size_ext(style, len(training_data), training_data.index)
     else:
         probs = process_sys_probs(prob_style, training_data.index)
-    log.info("Generated weighted sampler with prob array: " + str(probs))
+    log.debug("Generated weighted sampler with prob array: " + str(probs))
     # training_data.total_batch is the size of one epoch, you can increase it to avoid too many  rebuilding of iteraters
     len_sampler = training_data.total_batch * max(env.NUM_WORKERS, 1)
     with torch.device("cpu"):
diff --git a/deepmd/pt/utils/finetune.py b/deepmd/pt/utils/finetune.py
index 13749da151..b08dc9fbef 100644
--- a/deepmd/pt/utils/finetune.py
+++ b/deepmd/pt/utils/finetune.py
@@ -21,7 +21,8 @@ def change_finetune_model_params(
     """
     if multi_task:
         # TODO
-        log.error("finetune mode need modification for multitask mode!")
+        pass
+        # log.error("finetune mode need modification for multitask mode!")
     if finetune_model is not None:
         state_dict = torch.load(finetune_model, map_location=env.DEVICE)
         if "model" in state_dict:

From e9e0d95a61beeeef51d5f3bc24857487a4708ba6 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 29 Feb 2024 02:51:00 +0800
Subject: [PATCH 15/37] omit seed log

---
 deepmd/pt/model/task/ener.py    | 1 -
 deepmd/pt/model/task/fitting.py | 1 -
 2 files changed, 2 deletions(-)

diff --git a/deepmd/pt/model/task/ener.py b/deepmd/pt/model/task/ener.py
index 55ee79db25..edd73009c5 100644
--- a/deepmd/pt/model/task/ener.py
+++ b/deepmd/pt/model/task/ener.py
@@ -331,7 +331,6 @@ def __init__(
         self.filter_layers = torch.nn.ModuleList(filter_layers)
 
         if "seed" in kwargs:
-            log.info("Set seed to %d in fitting net.", kwargs["seed"])
             torch.manual_seed(kwargs["seed"])
 
     def output_def(self):
diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
index a964f0222d..0afc718684 100644
--- a/deepmd/pt/model/task/fitting.py
+++ b/deepmd/pt/model/task/fitting.py
@@ -343,7 +343,6 @@ def __init__(
             self.filter_layers_old = None
 
         if seed is not None:
-            log.info("Set seed to %d in fitting net.", seed)
             torch.manual_seed(seed)
 
     def serialize(self) -> dict:

From ab35653b5ad5c39cc72670814c516d6a8b8431ca Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 29 Feb 2024 12:32:33 +0800
Subject: [PATCH 16/37] Add fparam and aparam

---
 deepmd/pt/train/training.py  | 2 ++
 deepmd/utils/env_mat_stat.py | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index 55609d6dc3..6b62282817 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -861,6 +861,8 @@ def get_data(self, is_train=True, task_key="Default"):
             "atype",
             "box",
             "spin",
+            "fparam",
+            "aparam",
         ]
         input_dict = {item_key: None for item_key in input_keys}
         label_dict = {}
diff --git a/deepmd/utils/env_mat_stat.py b/deepmd/utils/env_mat_stat.py
index 2fa497b9b6..217c46844b 100644
--- a/deepmd/utils/env_mat_stat.py
+++ b/deepmd/utils/env_mat_stat.py
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
 from abc import (
     ABC,
     abstractmethod,
@@ -19,6 +20,8 @@
     DPPath,
 )
 
+log = logging.getLogger(__name__)
+
 
 class StatItem:
     """A class to store the statistics of the environment matrix.
@@ -170,10 +173,12 @@ def load_or_compute_stats(
         """
         if path is not None and path.is_dir():
             self.load_stats(path)
+            log.info(f"Load stats from {path}.")
         else:
             self.compute_stats(data)
             if path is not None:
                 self.save_stats(path)
+                log.info(f"Save stats to {path}.")
 
     def get_avg(self, default: float = 0) -> Dict[str, float]:
         """Get the average of the environment matrix.

From 64d60797bd640c6e3af794a87e09a5038d491d47 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 29 Feb 2024 12:37:32 +0800
Subject: [PATCH 17/37] Add type hint for `Callable`

---
 deepmd/dpmodel/descriptor/make_base_descriptor.py | 4 +++-
 deepmd/pt/model/descriptor/descriptor.py          | 4 +++-
 deepmd/pt/model/descriptor/dpa1.py                | 4 +++-
 deepmd/pt/model/descriptor/dpa2.py                | 4 +++-
 deepmd/pt/model/descriptor/hybrid.py              | 4 +++-
 deepmd/pt/model/descriptor/repformers.py          | 4 +++-
 deepmd/pt/model/descriptor/se_a.py                | 8 ++++++--
 deepmd/pt/model/descriptor/se_atten.py            | 4 +++-
 deepmd/pt/model/descriptor/se_r.py                | 4 +++-
 deepmd/pt/model/task/dipole.py                    | 2 +-
 deepmd/pt/model/task/ener.py                      | 2 +-
 deepmd/pt/model/task/polarizability.py            | 2 +-
 12 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/deepmd/dpmodel/descriptor/make_base_descriptor.py b/deepmd/dpmodel/descriptor/make_base_descriptor.py
index db0611b184..9a954a7f0b 100644
--- a/deepmd/dpmodel/descriptor/make_base_descriptor.py
+++ b/deepmd/dpmodel/descriptor/make_base_descriptor.py
@@ -92,7 +92,9 @@ def share_params(self, base_class, shared_level, resume=False):
             pass
 
         def compute_input_stats(
-            self, merged: Union[Callable, List[dict]], path: Optional[DPPath] = None
+            self,
+            merged: Union[Callable[[], List[dict]], List[dict]],
+            path: Optional[DPPath] = None,
         ):
             """Update mean and stddev for descriptor elements."""
             raise NotImplementedError
diff --git a/deepmd/pt/model/descriptor/descriptor.py b/deepmd/pt/model/descriptor/descriptor.py
index 02cd657c7c..778523a14d 100644
--- a/deepmd/pt/model/descriptor/descriptor.py
+++ b/deepmd/pt/model/descriptor/descriptor.py
@@ -89,7 +89,9 @@ def get_dim_emb(self) -> int:
         pass
 
     def compute_input_stats(
-        self, merged: Union[Callable, List[dict]], path: Optional[DPPath] = None
+        self,
+        merged: Union[Callable[[], List[dict]], List[dict]],
+        path: Optional[DPPath] = None,
     ):
         """Update mean and stddev for DescriptorBlock elements."""
         raise NotImplementedError
diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py
index c64f1e7f9a..7a65df3f11 100644
--- a/deepmd/pt/model/descriptor/dpa1.py
+++ b/deepmd/pt/model/descriptor/dpa1.py
@@ -149,7 +149,9 @@ def dim_emb(self):
         return self.get_dim_emb()
 
     def compute_input_stats(
-        self, merged: Union[Callable, List[dict]], path: Optional[DPPath] = None
+        self,
+        merged: Union[Callable[[], List[dict]], List[dict]],
+        path: Optional[DPPath] = None,
     ):
         return self.se_atten.compute_input_stats(merged, path)
 
diff --git a/deepmd/pt/model/descriptor/dpa2.py b/deepmd/pt/model/descriptor/dpa2.py
index 69269b1b56..87319f29be 100644
--- a/deepmd/pt/model/descriptor/dpa2.py
+++ b/deepmd/pt/model/descriptor/dpa2.py
@@ -333,7 +333,9 @@ def dim_emb(self):
         return self.get_dim_emb()
 
     def compute_input_stats(
-        self, merged: Union[Callable, List[dict]], path: Optional[DPPath] = None
+        self,
+        merged: Union[Callable[[], List[dict]], List[dict]],
+        path: Optional[DPPath] = None,
     ):
         for ii, descrpt in enumerate([self.repinit, self.repformers]):
             descrpt.compute_input_stats(merged, path)
diff --git a/deepmd/pt/model/descriptor/hybrid.py b/deepmd/pt/model/descriptor/hybrid.py
index 872f642ef5..40ff3e5c7f 100644
--- a/deepmd/pt/model/descriptor/hybrid.py
+++ b/deepmd/pt/model/descriptor/hybrid.py
@@ -158,7 +158,9 @@ def share_params(self, base_class, shared_level, resume=False):
             raise NotImplementedError
 
     def compute_input_stats(
-        self, merged: Union[Callable, List[dict]], path: Optional[DPPath] = None
+        self,
+        merged: Union[Callable[[], List[dict]], List[dict]],
+        path: Optional[DPPath] = None,
     ):
         """Update mean and stddev for descriptor elements."""
         for ii, descrpt in enumerate(self.descriptor_list):
diff --git a/deepmd/pt/model/descriptor/repformers.py b/deepmd/pt/model/descriptor/repformers.py
index 174daf14af..14fdbc60cb 100644
--- a/deepmd/pt/model/descriptor/repformers.py
+++ b/deepmd/pt/model/descriptor/repformers.py
@@ -281,7 +281,9 @@ def forward(
         return g1, g2, h2, rot_mat.view(-1, nloc, self.dim_emb, 3), sw
 
     def compute_input_stats(
-        self, merged: Union[Callable, List[dict]], path: Optional[DPPath] = None
+        self,
+        merged: Union[Callable[[], List[dict]], List[dict]],
+        path: Optional[DPPath] = None,
     ):
         """Update mean and stddev for descriptor elements."""
         env_mat_stat = EnvMatStatSe(self)
diff --git a/deepmd/pt/model/descriptor/se_a.py b/deepmd/pt/model/descriptor/se_a.py
index 7a68d347dc..0fc32e6ac3 100644
--- a/deepmd/pt/model/descriptor/se_a.py
+++ b/deepmd/pt/model/descriptor/se_a.py
@@ -145,7 +145,9 @@ def dim_out(self):
         return self.sea.dim_out
 
     def compute_input_stats(
-        self, merged: Union[Callable, List[dict]], path: Optional[DPPath] = None
+        self,
+        merged: Union[Callable[[], List[dict]], List[dict]],
+        path: Optional[DPPath] = None,
     ):
         """Update mean and stddev for descriptor elements."""
         return self.sea.compute_input_stats(merged, path)
@@ -405,7 +407,9 @@ def __getitem__(self, key):
             raise KeyError(key)
 
     def compute_input_stats(
-        self, merged: Union[Callable, List[dict]], path: Optional[DPPath] = None
+        self,
+        merged: Union[Callable[[], List[dict]], List[dict]],
+        path: Optional[DPPath] = None,
     ):
         """Update mean and stddev for descriptor elements."""
         env_mat_stat = EnvMatStatSe(self)
diff --git a/deepmd/pt/model/descriptor/se_atten.py b/deepmd/pt/model/descriptor/se_atten.py
index a056fbe889..9981a199de 100644
--- a/deepmd/pt/model/descriptor/se_atten.py
+++ b/deepmd/pt/model/descriptor/se_atten.py
@@ -203,7 +203,9 @@ def dim_emb(self):
         return self.get_dim_emb()
 
     def compute_input_stats(
-        self, merged: Union[Callable, List[dict]], path: Optional[DPPath] = None
+        self,
+        merged: Union[Callable[[], List[dict]], List[dict]],
+        path: Optional[DPPath] = None,
     ):
         """Update mean and stddev for descriptor elements."""
         env_mat_stat = EnvMatStatSe(self)
diff --git a/deepmd/pt/model/descriptor/se_r.py b/deepmd/pt/model/descriptor/se_r.py
index e16297b5e6..ad9b2ae9d9 100644
--- a/deepmd/pt/model/descriptor/se_r.py
+++ b/deepmd/pt/model/descriptor/se_r.py
@@ -179,7 +179,9 @@ def share_params(self, base_class, shared_level, resume=False):
             raise NotImplementedError
 
     def compute_input_stats(
-        self, merged: Union[Callable, List[dict]], path: Optional[DPPath] = None
+        self,
+        merged: Union[Callable[[], List[dict]], List[dict]],
+        path: Optional[DPPath] = None,
     ):
         """Update mean and stddev for descriptor elements."""
         env_mat_stat = EnvMatStatSe(self)
diff --git a/deepmd/pt/model/task/dipole.py b/deepmd/pt/model/task/dipole.py
index 08a3673a8c..00de5276ee 100644
--- a/deepmd/pt/model/task/dipole.py
+++ b/deepmd/pt/model/task/dipole.py
@@ -138,7 +138,7 @@ def output_def(self) -> FittingOutputDef:
 
     def compute_output_stats(
         self,
-        merged: Union[Callable, List[dict]],
+        merged: Union[Callable[[], List[dict]], List[dict]],
         stat_file_path: Optional[DPPath] = None,
     ):
         raise NotImplementedError
diff --git a/deepmd/pt/model/task/ener.py b/deepmd/pt/model/task/ener.py
index edd73009c5..404b92a10a 100644
--- a/deepmd/pt/model/task/ener.py
+++ b/deepmd/pt/model/task/ener.py
@@ -142,7 +142,7 @@ def serialize(self) -> dict:
 
     def compute_output_stats(
         self,
-        merged: Union[Callable, List[dict]],
+        merged: Union[Callable[[], List[dict]], List[dict]],
         stat_file_path: Optional[DPPath] = None,
     ):
         if stat_file_path is not None:
diff --git a/deepmd/pt/model/task/polarizability.py b/deepmd/pt/model/task/polarizability.py
index 0fe817084e..37c802613a 100644
--- a/deepmd/pt/model/task/polarizability.py
+++ b/deepmd/pt/model/task/polarizability.py
@@ -165,7 +165,7 @@ def output_def(self) -> FittingOutputDef:
 
     def compute_output_stats(
         self,
-        merged: Union[Callable, List[dict]],
+        merged: Union[Callable[[], List[dict]], List[dict]],
         stat_file_path: Optional[DPPath] = None,
     ):
         raise NotImplementedError

From 6020a2b33d71cebdd6c9ea2d3fef77d4a9b4d6b7 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 29 Feb 2024 15:37:19 +0800
Subject: [PATCH 18/37] Fix nopbc

---
 deepmd/pt/utils/stat.py | 8 ++++++--
 deepmd/utils/data.py    | 2 ++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py
index 4dbb633de5..3b246a0ec2 100644
--- a/deepmd/pt/utils/stat.py
+++ b/deepmd/pt/utils/stat.py
@@ -34,12 +34,16 @@ def make_stat_input(datasets, dataloaders, nbatches):
                     iterator = iter(dataloaders[i])
                     stat_data = next(iterator)
                 for dd in stat_data:
-                    if isinstance(stat_data[dd], torch.Tensor):
+                    if stat_data[dd] is None:
+                        sys_stat[dd] = None
+                    elif isinstance(stat_data[dd], torch.Tensor):
                         if dd not in sys_stat:
                             sys_stat[dd] = []
                         sys_stat[dd].append(stat_data[dd])
+                    else:
+                        pass
         for key in sys_stat:
-            if sys_stat[key][0] is None:
+            if sys_stat[key] is None or sys_stat[key][0] is None:
                 sys_stat[key] = None
             else:
                 sys_stat[key] = torch.cat(sys_stat[key], dim=0)
diff --git a/deepmd/utils/data.py b/deepmd/utils/data.py
index 6e0c47881f..9e726fbe19 100644
--- a/deepmd/utils/data.py
+++ b/deepmd/utils/data.py
@@ -490,6 +490,8 @@ def reformat_data_torch(self, data):
                 if self.data_dict[kk]["atomic"]:
                     data[kk] = data[kk].reshape(-1, self.data_dict[kk]["ndof"])
         data["atype"] = data["type"]
+        if not self.pbc:
+            data["box"] = None
         return data
 
     def _load_set(self, set_name: DPPath):

From 5db7883457df7d7cdc7523a7354d6fb4a5432cfb Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 29 Feb 2024 16:13:17 +0800
Subject: [PATCH 19/37] Add DataRequirementItem

---
 deepmd/dpmodel/model/base_model.py    |  5 +-
 deepmd/dpmodel/model/dp_model.py      |  9 +++-
 deepmd/pt/model/model/dipole_model.py | 41 ++++++++------
 deepmd/pt/model/model/dp_zbl_model.py | 77 ++++++++++++++------------
 deepmd/pt/model/model/ener_model.py   | 78 +++++++++++++++------------
 deepmd/pt/model/model/model.py        |  6 ++-
 deepmd/pt/model/model/polar_model.py  | 41 ++++++++------
 deepmd/pt/utils/dataloader.py         |  7 ++-
 deepmd/pt/utils/dataset.py            | 27 ++++++----
 deepmd/utils/data.py                  | 70 ++++++++++++++++++++++++
 10 files changed, 243 insertions(+), 118 deletions(-)

diff --git a/deepmd/dpmodel/model/base_model.py b/deepmd/dpmodel/model/base_model.py
index c4b998d763..ee22dec132 100644
--- a/deepmd/dpmodel/model/base_model.py
+++ b/deepmd/dpmodel/model/base_model.py
@@ -10,6 +10,9 @@
     Type,
 )
 
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
 from deepmd.utils.plugin import (
     PluginVariant,
     make_plugin_registry,
@@ -93,7 +96,7 @@ def model_output_type(self) -> str:
             """Get the output type for the model."""
 
         @abstractmethod
-        def data_requirement(self) -> dict:
+        def data_requirement(self) -> List[DataRequirementItem]:
             """Get the data requirement for the model."""
 
         @abstractmethod
diff --git a/deepmd/dpmodel/model/dp_model.py b/deepmd/dpmodel/model/dp_model.py
index 705750414b..88243c8742 100644
--- a/deepmd/dpmodel/model/dp_model.py
+++ b/deepmd/dpmodel/model/dp_model.py
@@ -1,10 +1,17 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    List,
+)
+
 from deepmd.dpmodel.atomic_model import (
     DPAtomicModel,
 )
 from deepmd.dpmodel.model.base_model import (
     BaseModel,
 )
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
 
 from .make_model import (
     make_model,
@@ -14,6 +21,6 @@
 # use "class" to resolve "Variable not allowed in type expression"
 @BaseModel.register("standard")
 class DPModel(make_model(DPAtomicModel), BaseModel):
-    def data_requirement(self) -> dict:
+    def data_requirement(self) -> List[DataRequirementItem]:
         """Get the data requirement for the model."""
         raise NotImplementedError
diff --git a/deepmd/pt/model/model/dipole_model.py b/deepmd/pt/model/model/dipole_model.py
index f6d896b5d8..106202d00c 100644
--- a/deepmd/pt/model/model/dipole_model.py
+++ b/deepmd/pt/model/model/dipole_model.py
@@ -1,11 +1,16 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Dict,
+    List,
     Optional,
 )
 
 import torch
 
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
+
 from .dp_model import (
     DPModel,
 )
@@ -92,21 +97,23 @@ def forward_lower(
         return model_predict
 
     @property
-    def data_requirement(self):
-        data_requirement = {
-            "dipole": {
-                "ndof": 3,
-                "atomic": False,
-                "must": False,
-                "high_prec": False,
-                "type_sel": self.get_sel_type(),
-            },
-            "atomic_dipole": {
-                "ndof": 3,
-                "atomic": True,
-                "must": False,
-                "high_prec": False,
-                "type_sel": self.get_sel_type(),
-            },
-        }
+    def data_requirement(self) -> List[DataRequirementItem]:
+        data_requirement = [
+            DataRequirementItem(
+                "dipole",
+                ndof=3,
+                atomic=False,
+                must=False,
+                high_prec=False,
+                type_sel=self.get_sel_type(),
+            ),
+            DataRequirementItem(
+                "atomic_dipole",
+                ndof=3,
+                atomic=True,
+                must=False,
+                high_prec=False,
+                type_sel=self.get_sel_type(),
+            ),
+        ]
         return data_requirement
diff --git a/deepmd/pt/model/model/dp_zbl_model.py b/deepmd/pt/model/model/dp_zbl_model.py
index fd47b4368d..fed9d89bf5 100644
--- a/deepmd/pt/model/model/dp_zbl_model.py
+++ b/deepmd/pt/model/model/dp_zbl_model.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Dict,
+    List,
     Optional,
 )
 
@@ -12,6 +13,9 @@
 from deepmd.pt.model.model.model import (
     BaseModel,
 )
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
 
 from .make_model import (
     make_model,
@@ -99,38 +103,43 @@ def forward_lower(
         return model_predict
 
     @property
-    def data_requirement(self):
-        data_requirement = {
-            "energy": {
-                "ndof": 1,
-                "atomic": False,
-                "must": False,
-                "high_prec": True,
-            },
-            "force": {
-                "ndof": 3,
-                "atomic": True,
-                "must": False,
-                "high_prec": False,
-            },
-            "virial": {
-                "ndof": 9,
-                "atomic": False,
-                "must": False,
-                "high_prec": False,
-            },
-            "atom_ener": {
-                "ndof": 1,
-                "atomic": True,
-                "must": False,
-                "high_prec": False,
-            },
-            "atom_pref": {
-                "ndof": 1,
-                "atomic": True,
-                "must": False,
-                "high_prec": False,
-                "repeat": 3,
-            },
-        }
+    def data_requirement(self) -> List[DataRequirementItem]:
+        data_requirement = [
+            DataRequirementItem(
+                "energy",
+                ndof=1,
+                atomic=False,
+                must=False,
+                high_prec=True,
+            ),
+            DataRequirementItem(
+                "force",
+                ndof=3,
+                atomic=True,
+                must=False,
+                high_prec=False,
+            ),
+            DataRequirementItem(
+                "virial",
+                ndof=9,
+                atomic=False,
+                must=False,
+                high_prec=False,
+            ),
+            DataRequirementItem(
+                "atom_ener",
+                ndof=1,
+                atomic=True,
+                must=False,
+                high_prec=False,
+            ),
+            DataRequirementItem(
+                "atom_pref",
+                ndof=1,
+                atomic=True,
+                must=False,
+                high_prec=False,
+                repeat=3,
+            ),
+        ]
         return data_requirement
diff --git a/deepmd/pt/model/model/ener_model.py b/deepmd/pt/model/model/ener_model.py
index 1497cbade4..92b2b95e34 100644
--- a/deepmd/pt/model/model/ener_model.py
+++ b/deepmd/pt/model/model/ener_model.py
@@ -1,11 +1,16 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Dict,
+    List,
     Optional,
 )
 
 import torch
 
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
+
 from .dp_model import (
     DPModel,
 )
@@ -97,38 +102,43 @@ def forward_lower(
         return model_predict
 
     @property
-    def data_requirement(self):
-        data_requirement = {
-            "energy": {
-                "ndof": 1,
-                "atomic": False,
-                "must": False,
-                "high_prec": True,
-            },
-            "force": {
-                "ndof": 3,
-                "atomic": True,
-                "must": False,
-                "high_prec": False,
-            },
-            "virial": {
-                "ndof": 9,
-                "atomic": False,
-                "must": False,
-                "high_prec": False,
-            },
-            "atom_ener": {
-                "ndof": 1,
-                "atomic": True,
-                "must": False,
-                "high_prec": False,
-            },
-            "atom_pref": {
-                "ndof": 1,
-                "atomic": True,
-                "must": False,
-                "high_prec": False,
-                "repeat": 3,
-            },
-        }
+    def data_requirement(self) -> List[DataRequirementItem]:
+        data_requirement = [
+            DataRequirementItem(
+                "energy",
+                ndof=1,
+                atomic=False,
+                must=False,
+                high_prec=True,
+            ),
+            DataRequirementItem(
+                "force",
+                ndof=3,
+                atomic=True,
+                must=False,
+                high_prec=False,
+            ),
+            DataRequirementItem(
+                "virial",
+                ndof=9,
+                atomic=False,
+                must=False,
+                high_prec=False,
+            ),
+            DataRequirementItem(
+                "atom_ener",
+                ndof=1,
+                atomic=True,
+                must=False,
+                high_prec=False,
+            ),
+            DataRequirementItem(
+                "atom_pref",
+                ndof=1,
+                atomic=True,
+                must=False,
+                high_prec=False,
+                repeat=3,
+            ),
+        ]
         return data_requirement
diff --git a/deepmd/pt/model/model/model.py b/deepmd/pt/model/model/model.py
index 0e2afadd14..1b82402747 100644
--- a/deepmd/pt/model/model/model.py
+++ b/deepmd/pt/model/model/model.py
@@ -1,11 +1,15 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
+    List,
     Optional,
 )
 
 from deepmd.dpmodel.model.base_model import (
     make_base_model,
 )
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
 from deepmd.utils.path import (
     DPPath,
 )
@@ -85,6 +89,6 @@ def compute_or_load_stat(
         """
         raise NotImplementedError
 
-    def data_requirement(self) -> dict:
+    def data_requirement(self) -> List[DataRequirementItem]:
         """Get the data requirement for the model."""
         raise NotImplementedError
diff --git a/deepmd/pt/model/model/polar_model.py b/deepmd/pt/model/model/polar_model.py
index 450f5f2fb5..c23e26afac 100644
--- a/deepmd/pt/model/model/polar_model.py
+++ b/deepmd/pt/model/model/polar_model.py
@@ -1,11 +1,16 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Dict,
+    List,
     Optional,
 )
 
 import torch
 
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
+
 from .dp_model import (
     DPModel,
 )
@@ -76,21 +81,23 @@ def forward_lower(
         return model_predict
 
     @property
-    def get_data_requirement(self):
-        data_requirement = {
-            "polar": {
-                "ndof": 9,
-                "atomic": False,
-                "must": False,
-                "high_prec": False,
-                "type_sel": self.get_sel_type(),
-            },
-            "atomic_polar": {
-                "ndof": 9,
-                "atomic": True,
-                "must": False,
-                "high_prec": False,
-                "type_sel": self.get_sel_type(),
-            },
-        }
+    def get_data_requirement(self) -> List[DataRequirementItem]:
+        data_requirement = [
+            DataRequirementItem(
+                "polar",
+                ndof=9,
+                atomic=False,
+                must=False,
+                high_prec=False,
+                type_sel=self.get_sel_type(),
+            ),
+            DataRequirementItem(
+                "atomic_polar",
+                ndof=9,
+                atomic=True,
+                must=False,
+                high_prec=False,
+                type_sel=self.get_sel_type(),
+            ),
+        ]
         return data_requirement
diff --git a/deepmd/pt/utils/dataloader.py b/deepmd/pt/utils/dataloader.py
index 70993c21a0..b197f46124 100644
--- a/deepmd/pt/utils/dataloader.py
+++ b/deepmd/pt/utils/dataloader.py
@@ -35,6 +35,9 @@
 from deepmd.pt.utils.dataset import (
     DeepmdDataSetForLoader,
 )
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
 from deepmd.utils.data_system import (
     prob_sys_size_ext,
     process_sys_probs,
@@ -147,10 +150,10 @@ def __getitem__(self, idx):
         batch["sid"] = idx
         return batch
 
-    def add_data_requirement(self, dict_of_keys):
+    def add_data_requirement(self, data_requirement: List[DataRequirementItem]):
         """Add data requirement for each system in multiple systems."""
         for system in self.systems:
-            system.add_data_requirement(dict_of_keys)
+            system.add_data_requirement(data_requirement)
 
 
 _sentinel = object()
diff --git a/deepmd/pt/utils/dataset.py b/deepmd/pt/utils/dataset.py
index 9de82778dc..40a513acdf 100644
--- a/deepmd/pt/utils/dataset.py
+++ b/deepmd/pt/utils/dataset.py
@@ -1,11 +1,16 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 
 
+from typing import (
+    List,
+)
+
 from torch.utils.data import (
     Dataset,
 )
 
 from deepmd.utils.data import (
+    DataRequirementItem,
     DeepmdData,
 )
 
@@ -42,17 +47,17 @@ def __getitem__(self, index):
         b_data["natoms"] = self._natoms_vec
         return b_data
 
-    def add_data_requirement(self, dict_of_keys):
+    def add_data_requirement(self, data_requirement: List[DataRequirementItem]):
         """Add data requirement for this data system."""
-        for data_key in dict_of_keys:
+        for data_item in data_requirement:
             self._data_system.add(
-                data_key,
-                dict_of_keys[data_key]["ndof"],
-                atomic=dict_of_keys[data_key].get("atomic", False),
-                must=dict_of_keys[data_key].get("must", False),
-                high_prec=dict_of_keys[data_key].get("high_prec", False),
-                type_sel=dict_of_keys[data_key].get("type_sel", None),
-                repeat=dict_of_keys[data_key].get("repeat", 1),
-                default=dict_of_keys[data_key].get("default", 0.0),
-                dtype=dict_of_keys[data_key].get("dtype", None),
+                data_item["key"],
+                data_item["ndof"],
+                atomic=data_item["atomic"],
+                must=data_item["must"],
+                high_prec=data_item["high_prec"],
+                type_sel=data_item["type_sel"],
+                repeat=data_item["repeat"],
+                default=data_item["default"],
+                dtype=data_item["dtype"],
             )
diff --git a/deepmd/utils/data.py b/deepmd/utils/data.py
index 9e726fbe19..03e39e1f21 100644
--- a/deepmd/utils/data.py
+++ b/deepmd/utils/data.py
@@ -666,3 +666,73 @@ def _check_pbc(self, sys_path: DPPath):
 
     def _check_mode(self, set_path: DPPath):
         return (set_path / "real_atom_types.npy").is_file()
+
+
+class DataRequirementItem:
+    """A class to store the data requirement for data systems.
+
+    Parameters
+    ----------
+    key
+        The key of the item. The corresponding data is stored in `sys_path/set.*/key.npy`
+    ndof
+        The number of dof
+    atomic
+        The item is an atomic property.
+        If False, the size of the data should be nframes x ndof
+        If True, the size of data should be nframes x natoms x ndof
+    must
+        The data file `sys_path/set.*/key.npy` must exist.
+        If must is False and the data file does not exist, the `data_dict[find_key]` is set to 0.0
+    high_prec
+        Load the data and store in float64, otherwise in float32
+    type_sel
+        Select certain type of atoms
+    repeat
+        The data will be repeated `repeat` times.
+    default : float, default=0.
+        default value of data
+    dtype : np.dtype, optional
+        the dtype of data, overwrites `high_prec` if provided
+    """
+
+    def __init__(
+        self,
+        key: str,
+        ndof: int,
+        atomic: bool = False,
+        must: bool = False,
+        high_prec: bool = False,
+        type_sel: Optional[List[int]] = None,
+        repeat: int = 1,
+        default: float = 0.0,
+        dtype: Optional[np.dtype] = None,
+    ) -> None:
+        self.key = key
+        self.ndof = ndof
+        self.atomic = atomic
+        self.must = must
+        self.high_prec = high_prec
+        self.type_sel = type_sel
+        self.repeat = repeat
+        self.default = default
+        self.dtype = dtype
+        self.dict = self.to_dict()
+
+    def to_dict(self) -> dict:
+        return {
+            "key": self.key,
+            "ndof": self.ndof,
+            "atomic": self.atomic,
+            "must": self.must,
+            "high_prec": self.high_prec,
+            "type_sel": self.type_sel,
+            "repeat": self.repeat,
+            "default": self.default,
+            "dtype": self.dtype,
+        }
+
+    def __getitem__(self, key: str):
+        if key not in self.dict:
+            raise KeyError(key)
+        return self.dict[key]

From cce52da575cb21da92e735b7dc93cf8f86134fb6 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 29 Feb 2024 08:16:14 +0000
Subject: [PATCH 20/37] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 deepmd/dpmodel/model/dp_model.py | 2 +-
 deepmd/pt/entrypoints/main.py    | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/deepmd/dpmodel/model/dp_model.py b/deepmd/dpmodel/model/dp_model.py
index 15b3ca2765..d4706cb3be 100644
--- a/deepmd/dpmodel/model/dp_model.py
+++ b/deepmd/dpmodel/model/dp_model.py
@@ -27,7 +27,7 @@ class DPModel(make_model(DPAtomicModel), BaseModel):
     def data_requirement(self) -> List[DataRequirementItem]:
         """Get the data requirement for the model."""
         raise NotImplementedError
-        
+
     @classmethod
     def update_sel(cls, global_jdata: dict, local_jdata: dict):
         """Update the selection and perform neighbor statistics.
diff --git a/deepmd/pt/entrypoints/main.py b/deepmd/pt/entrypoints/main.py
index ce8ace06d5..ab35e32012 100644
--- a/deepmd/pt/entrypoints/main.py
+++ b/deepmd/pt/entrypoints/main.py
@@ -50,9 +50,6 @@
 from deepmd.pt.utils.multi_task import (
     preprocess_shared_params,
 )
-from deepmd.pt.utils.stat import (
-    make_stat_input,
-)
 from deepmd.utils.argcheck import (
     normalize,
 )

From cdcfcb2fac35739b47f0f196393e922645fd52ca Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 29 Feb 2024 17:29:38 +0800
Subject: [PATCH 21/37] Fix neighbor-stat for multitask (#31)

---
 deepmd/pt/entrypoints/main.py | 36 +++++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/deepmd/pt/entrypoints/main.py b/deepmd/pt/entrypoints/main.py
index ab35e32012..844061d0ef 100644
--- a/deepmd/pt/entrypoints/main.py
+++ b/deepmd/pt/entrypoints/main.py
@@ -3,6 +3,9 @@
 import json
 import logging
 import os
+from copy import (
+    deepcopy,
+)
 from pathlib import (
     Path,
 )
@@ -72,9 +75,11 @@ def get_trainer(
     model_branch="",
     force_load=False,
     init_frz_model=None,
+    shared_links=None,
 ):
+    multi_task = "model_dict" in config.get("model", {})
     # argcheck
-    if "model_dict" not in config.get("model", {}):
+    if not multi_task:
         config = update_deepmd_input(config, warning=True, dump="input_v2_compat.json")
         config = normalize(config)
 
@@ -85,7 +90,6 @@ def get_trainer(
         assert dist.is_nccl_available()
         dist.init_process_group(backend="nccl")
 
-    multi_task = "model_dict" in config["model"]
     ckpt = init_model if init_model is not None else restart_model
     config["model"] = change_finetune_model_params(
         ckpt,
@@ -94,9 +98,6 @@ def get_trainer(
         multi_task=multi_task,
         model_branch=model_branch,
     )
-    shared_links = None
-    if multi_task:
-        config["model"], shared_links = preprocess_shared_params(config["model"])
 
     def prepare_trainer_input_single(
         model_params_single, data_dict_single, loss_dict_single, suffix=""
@@ -220,11 +221,33 @@ def train(FLAGS):
     SummaryPrinter()()
     with open(FLAGS.INPUT) as fin:
         config = json.load(fin)
+
+    # update multitask config
+    multi_task = "model_dict" in config["model"]
+    shared_links = None
+    if multi_task:
+        config["model"], shared_links = preprocess_shared_params(config["model"])
+
+    # do neighbor stat
     if not FLAGS.skip_neighbor_stat:
         log.info(
             "Calculate neighbor statistics... (add --skip-neighbor-stat to skip this step)"
         )
-        config["model"] = BaseModel.update_sel(config, config["model"])
+        if not multi_task:
+            config["model"] = BaseModel.update_sel(config, config["model"])
+        else:
+            training_jdata = deepcopy(config["training"])
+            training_jdata.pop("data_dict", {})
+            training_jdata.pop("model_prob", {})
+            for model_item in config["model"]["model_dict"]:
+                fake_global_jdata = {
+                    "model": deepcopy(config["model"]["model_dict"][model_item]),
+                    "training": deepcopy(config["training"]["data_dict"][model_item]),
+                }
+                fake_global_jdata["training"].update(training_jdata)
+                config["model"]["model_dict"][model_item] = BaseModel.update_sel(
+                    fake_global_jdata, config["model"]["model_dict"][model_item]
+                )
 
     trainer = get_trainer(
         config,
@@ -234,6 +257,7 @@ def train(FLAGS):
         FLAGS.model_branch,
         FLAGS.force_load,
         FLAGS.init_frz_model,
+        shared_links=shared_links,
     )
     trainer.run()
 

From a7d44d1c2283bcc4a096b1f0eb5eaf6241078ef1 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 29 Feb 2024 17:35:52 +0800
Subject: [PATCH 22/37] Revert "Fix neighbor-stat for multitask (#31)"

This reverts commit cdcfcb2fac35739b47f0f196393e922645fd52ca.
---
 deepmd/pt/entrypoints/main.py | 36 ++++++-----------------------------
 1 file changed, 6 insertions(+), 30 deletions(-)

diff --git a/deepmd/pt/entrypoints/main.py b/deepmd/pt/entrypoints/main.py
index 844061d0ef..ab35e32012 100644
--- a/deepmd/pt/entrypoints/main.py
+++ b/deepmd/pt/entrypoints/main.py
@@ -3,9 +3,6 @@
 import json
 import logging
 import os
-from copy import (
-    deepcopy,
-)
 from pathlib import (
     Path,
 )
@@ -75,11 +72,9 @@ def get_trainer(
     model_branch="",
     force_load=False,
     init_frz_model=None,
-    shared_links=None,
 ):
-    multi_task = "model_dict" in config.get("model", {})
     # argcheck
-    if not multi_task:
+    if "model_dict" not in config.get("model", {}):
         config = update_deepmd_input(config, warning=True, dump="input_v2_compat.json")
         config = normalize(config)
 
@@ -90,6 +85,7 @@ def get_trainer(
         assert dist.is_nccl_available()
         dist.init_process_group(backend="nccl")
 
+    multi_task = "model_dict" in config["model"]
     ckpt = init_model if init_model is not None else restart_model
     config["model"] = change_finetune_model_params(
         ckpt,
@@ -98,6 +94,9 @@ def get_trainer(
         multi_task=multi_task,
         model_branch=model_branch,
     )
+    shared_links = None
+    if multi_task:
+        config["model"], shared_links = preprocess_shared_params(config["model"])
 
     def prepare_trainer_input_single(
         model_params_single, data_dict_single, loss_dict_single, suffix=""
@@ -221,33 +220,11 @@ def train(FLAGS):
     SummaryPrinter()()
     with open(FLAGS.INPUT) as fin:
         config = json.load(fin)
-
-    # update multitask config
-    multi_task = "model_dict" in config["model"]
-    shared_links = None
-    if multi_task:
-        config["model"], shared_links = preprocess_shared_params(config["model"])
-
-    # do neighbor stat
     if not FLAGS.skip_neighbor_stat:
         log.info(
             "Calculate neighbor statistics... (add --skip-neighbor-stat to skip this step)"
         )
-        if not multi_task:
-            config["model"] = BaseModel.update_sel(config, config["model"])
-        else:
-            training_jdata = deepcopy(config["training"])
-            training_jdata.pop("data_dict", {})
-            training_jdata.pop("model_prob", {})
-            for model_item in config["model"]["model_dict"]:
-                fake_global_jdata = {
-                    "model": deepcopy(config["model"]["model_dict"][model_item]),
-                    "training": deepcopy(config["training"]["data_dict"][model_item]),
-                }
-                fake_global_jdata["training"].update(training_jdata)
-                config["model"]["model_dict"][model_item] = BaseModel.update_sel(
-                    fake_global_jdata, config["model"]["model_dict"][model_item]
-                )
+        config["model"] = BaseModel.update_sel(config, config["model"])
 
     trainer = get_trainer(
         config,
@@ -257,7 +234,6 @@ def train(FLAGS):
         FLAGS.model_branch,
         FLAGS.force_load,
         FLAGS.init_frz_model,
-        shared_links=shared_links,
     )
     trainer.run()
 

From fdca653a42c9cc0e2e8490d4dfe476416590f80c Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 29 Feb 2024 18:07:58 +0800
Subject: [PATCH 23/37] Move label requirement to loss func

---
 deepmd/dpmodel/model/base_model.py    |  7 ---
 deepmd/dpmodel/model/dp_model.py      | 10 ----
 deepmd/pt/loss/ener.py                | 50 ++++++++++++++++++
 deepmd/pt/loss/loss.py                | 13 +++++
 deepmd/pt/model/model/dipole_model.py | 27 ----------
 deepmd/pt/model/model/dp_zbl_model.py | 46 -----------------
 deepmd/pt/model/model/ener_model.py   | 47 -----------------
 deepmd/pt/model/model/model.py        |  8 ---
 deepmd/pt/model/model/polar_model.py  | 27 ----------
 deepmd/pt/train/training.py           | 53 +++++++++++--------
 source/tests/pt/test_stat.py          | 74 +++++++++++++++------------
 11 files changed, 135 insertions(+), 227 deletions(-)

diff --git a/deepmd/dpmodel/model/base_model.py b/deepmd/dpmodel/model/base_model.py
index ec23994575..e7cc8d9272 100644
--- a/deepmd/dpmodel/model/base_model.py
+++ b/deepmd/dpmodel/model/base_model.py
@@ -10,9 +10,6 @@
     Type,
 )
 
-from deepmd.utils.data import (
-    DataRequirementItem,
-)
 from deepmd.utils.plugin import (
     PluginVariant,
     make_plugin_registry,
@@ -95,10 +92,6 @@ def is_aparam_nall(self) -> bool:
         def model_output_type(self) -> str:
             """Get the output type for the model."""
 
-        @abstractmethod
-        def data_requirement(self) -> List[DataRequirementItem]:
-            """Get the data requirement for the model."""
-
         @abstractmethod
         def serialize(self) -> dict:
             """Serialize the model.
diff --git a/deepmd/dpmodel/model/dp_model.py b/deepmd/dpmodel/model/dp_model.py
index d4706cb3be..15f9027d4c 100644
--- a/deepmd/dpmodel/model/dp_model.py
+++ b/deepmd/dpmodel/model/dp_model.py
@@ -1,7 +1,4 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from typing import (
-    List,
-)
 
 from deepmd.dpmodel.atomic_model import (
     DPAtomicModel,
@@ -12,9 +9,6 @@
 from deepmd.dpmodel.model.base_model import (
     BaseModel,
 )
-from deepmd.utils.data import (
-    DataRequirementItem,
-)
 
 from .make_model import (
     make_model,
@@ -24,10 +18,6 @@
 # use "class" to resolve "Variable not allowed in type expression"
 @BaseModel.register("standard")
 class DPModel(make_model(DPAtomicModel), BaseModel):
-    def data_requirement(self) -> List[DataRequirementItem]:
-        """Get the data requirement for the model."""
-        raise NotImplementedError
-
     @classmethod
     def update_sel(cls, global_jdata: dict, local_jdata: dict):
         """Update the selection and perform neighbor statistics.
diff --git a/deepmd/pt/loss/ener.py b/deepmd/pt/loss/ener.py
index 4ed765cf69..648e954401 100644
--- a/deepmd/pt/loss/ener.py
+++ b/deepmd/pt/loss/ener.py
@@ -1,4 +1,8 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    List,
+)
+
 import torch
 import torch.nn.functional as F
 
@@ -11,6 +15,9 @@
 from deepmd.pt.utils.env import (
     GLOBAL_PT_FLOAT_PRECISION,
 )
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
 
 
 class EnergyStdLoss(TaskLoss):
@@ -153,3 +160,46 @@ def forward(self, model_pred, label, natoms, learning_rate, mae=False):
         if not self.inference:
             more_loss["rmse"] = torch.sqrt(loss.detach())
         return loss, more_loss
+
+    @property
+    def label_requirement(self) -> List[DataRequirementItem]:
+        """Return data label requirements needed for this loss calculation."""
+        data_requirement = [
+            DataRequirementItem(
+                "energy",
+                ndof=1,
+                atomic=False,
+                must=False,
+                high_prec=True,
+            ),
+            DataRequirementItem(
+                "force",
+                ndof=3,
+                atomic=True,
+                must=False,
+                high_prec=False,
+            ),
+            DataRequirementItem(
+                "virial",
+                ndof=9,
+                atomic=False,
+                must=False,
+                high_prec=False,
+            ),
+            DataRequirementItem(
+                "atom_ener",
+                ndof=1,
+                atomic=True,
+                must=False,
+                high_prec=False,
+            ),
+            DataRequirementItem(
+                "atom_pref",
+                ndof=1,
+                atomic=True,
+                must=False,
+                high_prec=False,
+                repeat=3,
+            ),
+        ]
+        return data_requirement
diff --git a/deepmd/pt/loss/loss.py b/deepmd/pt/loss/loss.py
index 9f2c3a7ed7..7059d76e03 100644
--- a/deepmd/pt/loss/loss.py
+++ b/deepmd/pt/loss/loss.py
@@ -1,6 +1,14 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    List,
+)
+
 import torch
 
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
+
 
 class TaskLoss(torch.nn.Module):
     def __init__(self, **kwargs):
@@ -10,3 +18,8 @@ def __init__(self, **kwargs):
     def forward(self, model_pred, label, natoms, learning_rate):
         """Return loss ."""
         raise NotImplementedError
+
+    @property
+    def label_requirement(self) -> List[DataRequirementItem]:
+        """Return data label requirements needed for this loss calculation."""
+        raise NotImplementedError
diff --git a/deepmd/pt/model/model/dipole_model.py b/deepmd/pt/model/model/dipole_model.py
index 106202d00c..6629541459 100644
--- a/deepmd/pt/model/model/dipole_model.py
+++ b/deepmd/pt/model/model/dipole_model.py
@@ -1,16 +1,11 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Dict,
-    List,
     Optional,
 )
 
 import torch
 
-from deepmd.utils.data import (
-    DataRequirementItem,
-)
-
 from .dp_model import (
     DPModel,
 )
@@ -95,25 +90,3 @@ def forward_lower(
         else:
             model_predict = model_ret
         return model_predict
-
-    @property
-    def data_requirement(self) -> List[DataRequirementItem]:
-        data_requirement = [
-            DataRequirementItem(
-                "dipole",
-                ndof=3,
-                atomic=False,
-                must=False,
-                high_prec=False,
-                type_sel=self.get_sel_type(),
-            ),
-            DataRequirementItem(
-                "atomic_dipole",
-                ndof=3,
-                atomic=True,
-                must=False,
-                high_prec=False,
-                type_sel=self.get_sel_type(),
-            ),
-        ]
-        return data_requirement
diff --git a/deepmd/pt/model/model/dp_zbl_model.py b/deepmd/pt/model/model/dp_zbl_model.py
index c8aade5eec..f2af0fff52 100644
--- a/deepmd/pt/model/model/dp_zbl_model.py
+++ b/deepmd/pt/model/model/dp_zbl_model.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Dict,
-    List,
     Optional,
 )
 
@@ -16,9 +15,6 @@
 from deepmd.pt.model.model.model import (
     BaseModel,
 )
-from deepmd.utils.data import (
-    DataRequirementItem,
-)
 
 from .make_model import (
     make_model,
@@ -105,48 +101,6 @@ def forward_lower(
         model_predict = model_ret
         return model_predict
 
-    @property
-    def data_requirement(self) -> List[DataRequirementItem]:
-        data_requirement = [
-            DataRequirementItem(
-                "energy",
-                ndof=1,
-                atomic=False,
-                must=False,
-                high_prec=True,
-            ),
-            DataRequirementItem(
-                "force",
-                ndof=3,
-                atomic=True,
-                must=False,
-                high_prec=False,
-            ),
-            DataRequirementItem(
-                "virial",
-                ndof=9,
-                atomic=False,
-                must=False,
-                high_prec=False,
-            ),
-            DataRequirementItem(
-                "atom_ener",
-                ndof=1,
-                atomic=True,
-                must=False,
-                high_prec=False,
-            ),
-            DataRequirementItem(
-                "atom_pref",
-                ndof=1,
-                atomic=True,
-                must=False,
-                high_prec=False,
-                repeat=3,
-            ),
-        ]
-        return data_requirement
-
     @classmethod
     def update_sel(cls, global_jdata: dict, local_jdata: dict):
         """Update the selection and perform neighbor statistics.
diff --git a/deepmd/pt/model/model/ener_model.py b/deepmd/pt/model/model/ener_model.py
index 92b2b95e34..1a5706dbbf 100644
--- a/deepmd/pt/model/model/ener_model.py
+++ b/deepmd/pt/model/model/ener_model.py
@@ -1,16 +1,11 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Dict,
-    List,
     Optional,
 )
 
 import torch
 
-from deepmd.utils.data import (
-    DataRequirementItem,
-)
-
 from .dp_model import (
     DPModel,
 )
@@ -100,45 +95,3 @@ def forward_lower(
         else:
             model_predict = model_ret
         return model_predict
-
-    @property
-    def data_requirement(self) -> List[DataRequirementItem]:
-        data_requirement = [
-            DataRequirementItem(
-                "energy",
-                ndof=1,
-                atomic=False,
-                must=False,
-                high_prec=True,
-            ),
-            DataRequirementItem(
-                "force",
-                ndof=3,
-                atomic=True,
-                must=False,
-                high_prec=False,
-            ),
-            DataRequirementItem(
-                "virial",
-                ndof=9,
-                atomic=False,
-                must=False,
-                high_prec=False,
-            ),
-            DataRequirementItem(
-                "atom_ener",
-                ndof=1,
-                atomic=True,
-                must=False,
-                high_prec=False,
-            ),
-            DataRequirementItem(
-                "atom_pref",
-                ndof=1,
-                atomic=True,
-                must=False,
-                high_prec=False,
-                repeat=3,
-            ),
-        ]
-        return data_requirement
diff --git a/deepmd/pt/model/model/model.py b/deepmd/pt/model/model/model.py
index 1b82402747..e32d2f307d 100644
--- a/deepmd/pt/model/model/model.py
+++ b/deepmd/pt/model/model/model.py
@@ -1,15 +1,11 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    List,
     Optional,
 )
 
 from deepmd.dpmodel.model.base_model import (
     make_base_model,
 )
-from deepmd.utils.data import (
-    DataRequirementItem,
-)
 from deepmd.utils.path import (
     DPPath,
 )
@@ -88,7 +84,3 @@ def compute_or_load_stat(
             The path to the statistics files.
         """
         raise NotImplementedError
-
-    def data_requirement(self) -> List[DataRequirementItem]:
-        """Get the data requirement for the model."""
-        raise NotImplementedError
diff --git a/deepmd/pt/model/model/polar_model.py b/deepmd/pt/model/model/polar_model.py
index c23e26afac..d956a0344c 100644
--- a/deepmd/pt/model/model/polar_model.py
+++ b/deepmd/pt/model/model/polar_model.py
@@ -1,16 +1,11 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Dict,
-    List,
     Optional,
 )
 
 import torch
 
-from deepmd.utils.data import (
-    DataRequirementItem,
-)
-
 from .dp_model import (
     DPModel,
 )
@@ -79,25 +74,3 @@ def forward_lower(
         else:
             model_predict = model_ret
         return model_predict
-
-    @property
-    def get_data_requirement(self) -> List[DataRequirementItem]:
-        data_requirement = [
-            DataRequirementItem(
-                "polar",
-                ndof=9,
-                atomic=False,
-                must=False,
-                high_prec=False,
-                type_sel=self.get_sel_type(),
-            ),
-            DataRequirementItem(
-                "atomic_polar",
-                ndof=9,
-                atomic=True,
-                must=False,
-                high_prec=False,
-                type_sel=self.get_sel_type(),
-            ),
-        ]
-        return data_requirement
diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index 6b62282817..21a285f540 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -202,12 +202,16 @@ def get_data_loader(_training_data, _validation_data, _training_params):
             )
 
         def get_single_model(
-            _model_params, _training_data, _validation_data, _stat_file_path
+            _model_params,
+            _training_data,
+            _validation_data,
+            _stat_file_path,
+            _data_requirement,
         ):
             model = get_model(deepcopy(_model_params)).to(DEVICE)
-            _training_data.add_data_requirement(model.data_requirement)
+            _training_data.add_data_requirement(_data_requirement)
             if _validation_data is not None:
-                _validation_data.add_data_requirement(model.data_requirement)
+                _validation_data.add_data_requirement(_data_requirement)
             if not resuming:
 
                 @lazy
@@ -262,11 +266,33 @@ def get_loss(loss_params, start_lr, _ntypes):
         else:
             self.opt_type, self.opt_param = get_opt_param(training_params)
 
+        # Loss
+        if not self.multi_task:
+            self.loss = get_loss(
+                config["loss"],
+                config["learning_rate"]["start_lr"],
+                len(model_params["type_map"]),
+            )
+        else:
+            self.loss = {}
+            for model_key in self.model_keys:
+                loss_param = config["loss_dict"][model_key]
+                if config.get("learning_rate_dict", None) is not None:
+                    lr_param = config["learning_rate_dict"][model_key]["start_lr"]
+                else:
+                    lr_param = config["learning_rate"]["start_lr"]
+                ntypes = len(model_params["model_dict"][model_key]["type_map"])
+                self.loss[model_key] = get_loss(loss_param, lr_param, ntypes)
+
         # Data + Model
         dp_random.seed(training_params["seed"])
         if not self.multi_task:
             self.model = get_single_model(
-                model_params, training_data, validation_data, stat_file_path
+                model_params,
+                training_data,
+                validation_data,
+                stat_file_path,
+                self.loss.label_requirement,
             )
             (
                 self.training_dataloader,
@@ -290,6 +316,7 @@ def get_loss(loss_params, start_lr, _ntypes):
                     training_data[model_key],
                     validation_data[model_key],
                     stat_file_path[model_key],
+                    self.loss[model_key].label_requirement,
                 )
                 (
                     self.training_dataloader[model_key],
@@ -316,24 +343,6 @@ def get_loss(loss_params, start_lr, _ntypes):
         else:
             self.lr_exp = get_lr(config["learning_rate"])
 
-        # Loss
-        if not self.multi_task:
-            self.loss = get_loss(
-                config["loss"],
-                config["learning_rate"]["start_lr"],
-                len(model_params["type_map"]),
-            )
-        else:
-            self.loss = {}
-            for model_key in self.model_keys:
-                loss_param = config["loss_dict"][model_key]
-                if config.get("learning_rate_dict", None) is not None:
-                    lr_param = config["learning_rate_dict"][model_key]["start_lr"]
-                else:
-                    lr_param = config["learning_rate"]["start_lr"]
-                ntypes = len(model_params["model_dict"][model_key]["type_map"])
-                self.loss[model_key] = get_loss(loss_param, lr_param, ntypes)
-
         # JIT
         if JIT:
             self.model = torch.jit.script(self.model)
diff --git a/source/tests/pt/test_stat.py b/source/tests/pt/test_stat.py
index 54810fcc8f..3a09f82baf 100644
--- a/source/tests/pt/test_stat.py
+++ b/source/tests/pt/test_stat.py
@@ -44,42 +44,50 @@
 from deepmd.tf.utils.data_system import (
     DeepmdDataSystem,
 )
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
 
 CUR_DIR = os.path.dirname(__file__)
 
-energy_data_requirement = {
-    "energy": {
-        "ndof": 1,
-        "atomic": False,
-        "must": False,
-        "high_prec": True,
-    },
-    "force": {
-        "ndof": 3,
-        "atomic": True,
-        "must": False,
-        "high_prec": False,
-    },
-    "virial": {
-        "ndof": 9,
-        "atomic": False,
-        "must": False,
-        "high_prec": False,
-    },
-    "atom_ener": {
-        "ndof": 1,
-        "atomic": True,
-        "must": False,
-        "high_prec": False,
-    },
-    "atom_pref": {
-        "ndof": 1,
-        "atomic": True,
-        "must": False,
-        "high_prec": False,
-        "repeat": 3,
-    },
-}
+energy_data_requirement = [
+    DataRequirementItem(
+        "energy",
+        ndof=1,
+        atomic=False,
+        must=False,
+        high_prec=True,
+    ),
+    DataRequirementItem(
+        "force",
+        ndof=3,
+        atomic=True,
+        must=False,
+        high_prec=False,
+    ),
+    DataRequirementItem(
+        "virial",
+        ndof=9,
+        atomic=False,
+        must=False,
+        high_prec=False,
+    ),
+    DataRequirementItem(
+        "atom_ener",
+        ndof=1,
+        atomic=True,
+        must=False,
+        high_prec=False,
+    ),
+    DataRequirementItem(
+        "atom_pref",
+        ndof=1,
+        atomic=True,
+        must=False,
+        high_prec=False,
+        repeat=3,
+    ),
+]
 
 
 def compare(ut, base, given):

From 525ce93cc97dd8f3bbf56dd1539974dd73114cc8 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 29 Feb 2024 18:38:43 +0800
Subject: [PATCH 24/37] resolve conversations

---
 deepmd/dpmodel/descriptor/make_base_descriptor.py | 6 +++++-
 deepmd/dpmodel/descriptor/se_e2_a.py              | 6 +++++-
 deepmd/dpmodel/descriptor/se_r.py                 | 6 +++++-
 deepmd/pt/model/descriptor/descriptor.py          | 5 +++++
 deepmd/pt/model/descriptor/dpa1.py                | 5 +++++
 deepmd/pt/model/descriptor/dpa2.py                | 5 +++++
 deepmd/pt/model/descriptor/hybrid.py              | 5 +++++
 deepmd/pt/model/descriptor/se_a.py                | 5 +++++
 deepmd/pt/model/descriptor/se_r.py                | 5 +++++
 deepmd/pt/model/network/network.py                | 5 +++++
 deepmd/pt/model/task/dipole.py                    | 2 +-
 deepmd/pt/model/task/fitting.py                   | 5 +++++
 deepmd/pt/model/task/polarizability.py            | 2 +-
 deepmd/pt/train/wrapper.py                        | 5 +++++
 14 files changed, 62 insertions(+), 5 deletions(-)

diff --git a/deepmd/dpmodel/descriptor/make_base_descriptor.py b/deepmd/dpmodel/descriptor/make_base_descriptor.py
index 11db208077..940bd0cd27 100644
--- a/deepmd/dpmodel/descriptor/make_base_descriptor.py
+++ b/deepmd/dpmodel/descriptor/make_base_descriptor.py
@@ -88,7 +88,11 @@ def mixed_types(self) -> bool:
 
         @abstractmethod
         def share_params(self, base_class, shared_level, resume=False):
-            """Share the parameters of self to the base_class with shared_level."""
+            """
+            Share the parameters of self to the base_class with shared_level during multitask training.
+            If not start from checkpoint (resume is False),
+            some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+            """
             pass
 
         def compute_input_stats(
diff --git a/deepmd/dpmodel/descriptor/se_e2_a.py b/deepmd/dpmodel/descriptor/se_e2_a.py
index 14010c34e2..f6b1c5677e 100644
--- a/deepmd/dpmodel/descriptor/se_e2_a.py
+++ b/deepmd/dpmodel/descriptor/se_e2_a.py
@@ -244,7 +244,11 @@ def mixed_types(self):
         return False
 
     def share_params(self, base_class, shared_level, resume=False):
-        """Share the parameters of self to the base_class with shared_level."""
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
         raise NotImplementedError
 
     def get_ntypes(self) -> int:
diff --git a/deepmd/dpmodel/descriptor/se_r.py b/deepmd/dpmodel/descriptor/se_r.py
index feea008478..fda8b19474 100644
--- a/deepmd/dpmodel/descriptor/se_r.py
+++ b/deepmd/dpmodel/descriptor/se_r.py
@@ -204,7 +204,11 @@ def mixed_types(self):
         return False
 
     def share_params(self, base_class, shared_level, resume=False):
-        """Share the parameters of self to the base_class with shared_level."""
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
         raise NotImplementedError
 
     def get_ntypes(self) -> int:
diff --git a/deepmd/pt/model/descriptor/descriptor.py b/deepmd/pt/model/descriptor/descriptor.py
index 778523a14d..339a716942 100644
--- a/deepmd/pt/model/descriptor/descriptor.py
+++ b/deepmd/pt/model/descriptor/descriptor.py
@@ -101,6 +101,11 @@ def get_stats(self) -> Dict[str, StatItem]:
         raise NotImplementedError
 
     def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
         assert (
             self.__class__ == base_class.__class__
         ), "Only descriptors of the same type can share params!"
diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py
index 08c37487de..ddb1d0ea05 100644
--- a/deepmd/pt/model/descriptor/dpa1.py
+++ b/deepmd/pt/model/descriptor/dpa1.py
@@ -148,6 +148,11 @@ def mixed_types(self) -> bool:
         return self.se_atten.mixed_types()
 
     def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
         assert (
             self.__class__ == base_class.__class__
         ), "Only descriptors of the same type can share params!"
diff --git a/deepmd/pt/model/descriptor/dpa2.py b/deepmd/pt/model/descriptor/dpa2.py
index d407452e46..3a4319860f 100644
--- a/deepmd/pt/model/descriptor/dpa2.py
+++ b/deepmd/pt/model/descriptor/dpa2.py
@@ -292,6 +292,11 @@ def mixed_types(self) -> bool:
         return True
 
     def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
         assert (
             self.__class__ == base_class.__class__
         ), "Only descriptors of the same type can share params!"
diff --git a/deepmd/pt/model/descriptor/hybrid.py b/deepmd/pt/model/descriptor/hybrid.py
index 40ff3e5c7f..2c68afa892 100644
--- a/deepmd/pt/model/descriptor/hybrid.py
+++ b/deepmd/pt/model/descriptor/hybrid.py
@@ -146,6 +146,11 @@ def dim_emb(self):
             raise RuntimeError
 
     def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
         assert (
             self.__class__ == base_class.__class__
         ), "Only descriptors of the same type can share params!"
diff --git a/deepmd/pt/model/descriptor/se_a.py b/deepmd/pt/model/descriptor/se_a.py
index 9a9883cdb3..eddfcf4047 100644
--- a/deepmd/pt/model/descriptor/se_a.py
+++ b/deepmd/pt/model/descriptor/se_a.py
@@ -130,6 +130,11 @@ def mixed_types(self):
         return self.sea.mixed_types()
 
     def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
         assert (
             self.__class__ == base_class.__class__
         ), "Only descriptors of the same type can share params!"
diff --git a/deepmd/pt/model/descriptor/se_r.py b/deepmd/pt/model/descriptor/se_r.py
index ba483ea711..4e7e516065 100644
--- a/deepmd/pt/model/descriptor/se_r.py
+++ b/deepmd/pt/model/descriptor/se_r.py
@@ -154,6 +154,11 @@ def mixed_types(self) -> bool:
         return False
 
     def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
         assert (
             self.__class__ == base_class.__class__
         ), "Only descriptors of the same type can share params!"
diff --git a/deepmd/pt/model/network/network.py b/deepmd/pt/model/network/network.py
index 9ef7b3366a..10d0364c9b 100644
--- a/deepmd/pt/model/network/network.py
+++ b/deepmd/pt/model/network/network.py
@@ -575,6 +575,11 @@ def forward(self, atype):
         return self.embedding(atype)
 
     def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
         assert (
             self.__class__ == base_class.__class__
         ), "Only TypeEmbedNet of the same type can share params!"
diff --git a/deepmd/pt/model/task/dipole.py b/deepmd/pt/model/task/dipole.py
index 00de5276ee..6956d2ce25 100644
--- a/deepmd/pt/model/task/dipole.py
+++ b/deepmd/pt/model/task/dipole.py
@@ -93,7 +93,7 @@ def __init__(
         self.r_differentiable = r_differentiable
         self.c_differentiable = c_differentiable
         super().__init__(
-            var_name="dipole" if "var_name" not in kwargs else kwargs.pop("var_name"),
+            var_name=kwargs.pop("var_name", "dipole"),
             ntypes=ntypes,
             dim_descrpt=dim_descrpt,
             neuron=neuron,
diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
index d752ac964c..47535580db 100644
--- a/deepmd/pt/model/task/fitting.py
+++ b/deepmd/pt/model/task/fitting.py
@@ -62,6 +62,11 @@ def __new__(cls, *args, **kwargs):
         return super().__new__(cls)
 
     def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
         assert (
             self.__class__ == base_class.__class__
         ), "Only fitting nets of the same type can share params!"
diff --git a/deepmd/pt/model/task/polarizability.py b/deepmd/pt/model/task/polarizability.py
index 37c802613a..3c40e9f734 100644
--- a/deepmd/pt/model/task/polarizability.py
+++ b/deepmd/pt/model/task/polarizability.py
@@ -115,7 +115,7 @@ def __init__(
         ).view(ntypes, 1)
         self.shift_diag = shift_diag
         super().__init__(
-            var_name="polar" if "var_name" not in kwargs else kwargs.pop("var_name"),
+            var_name=kwargs.pop("var_name", "polar"),
             ntypes=ntypes,
             dim_descrpt=dim_descrpt,
             neuron=neuron,
diff --git a/deepmd/pt/train/wrapper.py b/deepmd/pt/train/wrapper.py
index 52cc636c10..67f8043653 100644
--- a/deepmd/pt/train/wrapper.py
+++ b/deepmd/pt/train/wrapper.py
@@ -83,6 +83,11 @@ def set_trainable_params(self):
                         param.requires_grad = trainable
 
     def share_params(self, shared_links, resume=False):
+        """
+        Share the parameters of classes following rules defined in shared_links during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
         supported_types = ["descriptor", "fitting_net"]
         for shared_item in shared_links:
             class_name = shared_links[shared_item]["type"]

From 46ee16c8dd91942329db32f32549820f1089ef62 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 29 Feb 2024 18:43:55 +0800
Subject: [PATCH 25/37] set label_requirement abstractmethod

---
 deepmd/pt/loss/loss.py      | 9 +++++++--
 deepmd/pt/utils/finetune.py | 5 +----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/deepmd/pt/loss/loss.py b/deepmd/pt/loss/loss.py
index 7059d76e03..925ff8f4ef 100644
--- a/deepmd/pt/loss/loss.py
+++ b/deepmd/pt/loss/loss.py
@@ -1,4 +1,8 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+from abc import (
+    ABC,
+    abstractmethod,
+)
 from typing import (
     List,
 )
@@ -10,7 +14,7 @@
 )
 
 
-class TaskLoss(torch.nn.Module):
+class TaskLoss(torch.nn.Module, ABC):
     def __init__(self, **kwargs):
         """Construct loss."""
         super().__init__()
@@ -20,6 +24,7 @@ def forward(self, model_pred, label, natoms, learning_rate):
         raise NotImplementedError
 
     @property
+    @abstractmethod
     def label_requirement(self) -> List[DataRequirementItem]:
         """Return data label requirements needed for this loss calculation."""
-        raise NotImplementedError
+        pass
diff --git a/deepmd/pt/utils/finetune.py b/deepmd/pt/utils/finetune.py
index b08dc9fbef..c8fa1e5185 100644
--- a/deepmd/pt/utils/finetune.py
+++ b/deepmd/pt/utils/finetune.py
@@ -19,10 +19,7 @@ def change_finetune_model_params(
     - ckpt & finetune_model: origin model.
     - config: Read from json file.
     """
-    if multi_task:
-        # TODO
-        pass
-        # log.error("finetune mode need modification for multitask mode!")
+    # TODO need support for multitask mode
     if finetune_model is not None:
         state_dict = torch.load(finetune_model, map_location=env.DEVICE)
         if "model" in state_dict:

From 9d18dc4f043e9af82dcbef748de6627a95737928 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 29 Feb 2024 18:54:44 +0800
Subject: [PATCH 26/37] make label_requirement dynamic

---
 deepmd/pt/loss/ener.py | 135 +++++++++++++++++++++++++++++------------
 1 file changed, 95 insertions(+), 40 deletions(-)

diff --git a/deepmd/pt/loss/ener.py b/deepmd/pt/loss/ener.py
index 648e954401..2834733112 100644
--- a/deepmd/pt/loss/ener.py
+++ b/deepmd/pt/loss/ener.py
@@ -30,16 +30,57 @@ def __init__(
         limit_pref_f=0.0,
         start_pref_v=0.0,
         limit_pref_v=0.0,
+        start_pref_ae: float = 0.0,
+        limit_pref_ae: float = 0.0,
+        start_pref_pf: float = 0.0,
+        limit_pref_pf: float = 0.0,
         use_l1_all: bool = False,
         inference=False,
         **kwargs,
     ):
-        """Construct a layer to compute loss on energy, force and virial."""
+        r"""Construct a layer to compute loss on energy, force and virial.
+
+        Parameters
+        ----------
+        starter_learning_rate : float
+            The learning rate at the start of the training.
+        start_pref_e : float
+            The prefactor of energy loss at the start of the training.
+        limit_pref_e : float
+            The prefactor of energy loss at the end of the training.
+        start_pref_f : float
+            The prefactor of force loss at the start of the training.
+        limit_pref_f : float
+            The prefactor of force loss at the end of the training.
+        start_pref_v : float
+            The prefactor of virial loss at the start of the training.
+        limit_pref_v : float
+            The prefactor of virial loss at the end of the training.
+        start_pref_ae : float
+            The prefactor of atomic energy loss at the start of the training.
+        limit_pref_ae : float
+            The prefactor of atomic energy loss at the end of the training.
+        start_pref_pf : float
+            The prefactor of atomic prefactor force loss at the start of the training.
+        limit_pref_pf : float
+            The prefactor of atomic prefactor force loss at the end of the training.
+        use_l1_all : bool
+            Whether to use L1 loss, if False (default), it will use L2 loss.
+        inference : bool
+            If true, it will output all losses found in output, ignoring the pre-factors.
+        **kwargs
+            Other keyword arguments.
+        """
         super().__init__()
         self.starter_learning_rate = starter_learning_rate
         self.has_e = (start_pref_e != 0.0 and limit_pref_e != 0.0) or inference
         self.has_f = (start_pref_f != 0.0 and limit_pref_f != 0.0) or inference
         self.has_v = (start_pref_v != 0.0 and limit_pref_v != 0.0) or inference
+
+        # TODO need support for atomic energy and atomic pref
+        self.has_ae = (start_pref_ae != 0.0 and limit_pref_ae != 0.0) or inference
+        self.has_pf = (start_pref_pf != 0.0 and limit_pref_pf != 0.0) or inference
+
         self.start_pref_e = start_pref_e
         self.limit_pref_e = limit_pref_e
         self.start_pref_f = start_pref_f
@@ -164,42 +205,56 @@ def forward(self, model_pred, label, natoms, learning_rate, mae=False):
     @property
     def label_requirement(self) -> List[DataRequirementItem]:
         """Return data label requirements needed for this loss calculation."""
-        data_requirement = [
-            DataRequirementItem(
-                "energy",
-                ndof=1,
-                atomic=False,
-                must=False,
-                high_prec=True,
-            ),
-            DataRequirementItem(
-                "force",
-                ndof=3,
-                atomic=True,
-                must=False,
-                high_prec=False,
-            ),
-            DataRequirementItem(
-                "virial",
-                ndof=9,
-                atomic=False,
-                must=False,
-                high_prec=False,
-            ),
-            DataRequirementItem(
-                "atom_ener",
-                ndof=1,
-                atomic=True,
-                must=False,
-                high_prec=False,
-            ),
-            DataRequirementItem(
-                "atom_pref",
-                ndof=1,
-                atomic=True,
-                must=False,
-                high_prec=False,
-                repeat=3,
-            ),
-        ]
-        return data_requirement
+        label_requirement = []
+        if self.has_e:
+            label_requirement.append(
+                DataRequirementItem(
+                    "energy",
+                    ndof=1,
+                    atomic=False,
+                    must=False,
+                    high_prec=True,
+                )
+            )
+        if self.has_f:
+            label_requirement.append(
+                DataRequirementItem(
+                    "force",
+                    ndof=3,
+                    atomic=True,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        if self.has_v:
+            label_requirement.append(
+                DataRequirementItem(
+                    "virial",
+                    ndof=9,
+                    atomic=False,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        if self.has_ae:
+            label_requirement.append(
+                DataRequirementItem(
+                    "atom_ener",
+                    ndof=1,
+                    atomic=True,
+                    must=False,
+                    high_prec=False,
+                )
+            )
+        if self.has_pf:
+            label_requirement.append(
+                DataRequirementItem(
+                    "atom_pref",
+                    ndof=1,
+                    atomic=True,
+                    must=False,
+                    high_prec=False,
+                    repeat=3,
+                )
+            )
+        return label_requirement

From ad7227dc65b97d7b4797f6fdae00990466f25368 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 29 Feb 2024 19:11:48 +0800
Subject: [PATCH 27/37] update docs

---
 deepmd/pt/model/descriptor/descriptor.py | 17 +++++++++++-
 deepmd/pt/model/descriptor/dpa1.py       | 16 +++++++++++
 deepmd/pt/model/descriptor/dpa2.py       | 16 +++++++++++
 deepmd/pt/model/descriptor/hybrid.py     | 17 +++++++++++-
 deepmd/pt/model/descriptor/repformers.py | 17 +++++++++++-
 deepmd/pt/model/descriptor/se_a.py       | 34 ++++++++++++++++++++++--
 deepmd/pt/model/descriptor/se_atten.py   | 17 +++++++++++-
 deepmd/pt/model/descriptor/se_r.py       | 17 +++++++++++-
 deepmd/pt/model/task/dipole.py           | 16 +++++++++++
 deepmd/pt/model/task/ener.py             | 16 +++++++++++
 deepmd/pt/model/task/polarizability.py   | 16 +++++++++++
 11 files changed, 192 insertions(+), 7 deletions(-)

diff --git a/deepmd/pt/model/descriptor/descriptor.py b/deepmd/pt/model/descriptor/descriptor.py
index 339a716942..24c1ef4dab 100644
--- a/deepmd/pt/model/descriptor/descriptor.py
+++ b/deepmd/pt/model/descriptor/descriptor.py
@@ -93,7 +93,22 @@ def compute_input_stats(
         merged: Union[Callable[[], List[dict]], List[dict]],
         path: Optional[DPPath] = None,
     ):
-        """Update mean and stddev for DescriptorBlock elements."""
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
         raise NotImplementedError
 
     def get_stats(self) -> Dict[str, StatItem]:
diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py
index ddb1d0ea05..224a24d60e 100644
--- a/deepmd/pt/model/descriptor/dpa1.py
+++ b/deepmd/pt/model/descriptor/dpa1.py
@@ -183,6 +183,22 @@ def compute_input_stats(
         merged: Union[Callable[[], List[dict]], List[dict]],
         path: Optional[DPPath] = None,
     ):
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
         return self.se_atten.compute_input_stats(merged, path)
 
     def serialize(self) -> dict:
diff --git a/deepmd/pt/model/descriptor/dpa2.py b/deepmd/pt/model/descriptor/dpa2.py
index 3a4319860f..dcb381d53a 100644
--- a/deepmd/pt/model/descriptor/dpa2.py
+++ b/deepmd/pt/model/descriptor/dpa2.py
@@ -345,6 +345,22 @@ def compute_input_stats(
         merged: Union[Callable[[], List[dict]], List[dict]],
         path: Optional[DPPath] = None,
     ):
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
         for ii, descrpt in enumerate([self.repinit, self.repformers]):
             descrpt.compute_input_stats(merged, path)
 
diff --git a/deepmd/pt/model/descriptor/hybrid.py b/deepmd/pt/model/descriptor/hybrid.py
index 2c68afa892..bf3145f9f2 100644
--- a/deepmd/pt/model/descriptor/hybrid.py
+++ b/deepmd/pt/model/descriptor/hybrid.py
@@ -167,7 +167,22 @@ def compute_input_stats(
         merged: Union[Callable[[], List[dict]], List[dict]],
         path: Optional[DPPath] = None,
     ):
-        """Update mean and stddev for descriptor elements."""
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
         for ii, descrpt in enumerate(self.descriptor_list):
             # need support for hybrid descriptors
             descrpt.compute_input_stats(merged, path)
diff --git a/deepmd/pt/model/descriptor/repformers.py b/deepmd/pt/model/descriptor/repformers.py
index eec07d8854..3e8bf72f77 100644
--- a/deepmd/pt/model/descriptor/repformers.py
+++ b/deepmd/pt/model/descriptor/repformers.py
@@ -285,7 +285,22 @@ def compute_input_stats(
         merged: Union[Callable[[], List[dict]], List[dict]],
         path: Optional[DPPath] = None,
     ):
-        """Update mean and stddev for descriptor elements."""
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
         env_mat_stat = EnvMatStatSe(self)
         if path is not None:
             path = path / env_mat_stat.get_hash()
diff --git a/deepmd/pt/model/descriptor/se_a.py b/deepmd/pt/model/descriptor/se_a.py
index eddfcf4047..d836b48992 100644
--- a/deepmd/pt/model/descriptor/se_a.py
+++ b/deepmd/pt/model/descriptor/se_a.py
@@ -157,7 +157,22 @@ def compute_input_stats(
         merged: Union[Callable[[], List[dict]], List[dict]],
         path: Optional[DPPath] = None,
     ):
-        """Update mean and stddev for descriptor elements."""
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
         return self.sea.compute_input_stats(merged, path)
 
     def reinit_exclude(
@@ -440,7 +455,22 @@ def compute_input_stats(
         merged: Union[Callable[[], List[dict]], List[dict]],
         path: Optional[DPPath] = None,
     ):
-        """Update mean and stddev for descriptor elements."""
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
         env_mat_stat = EnvMatStatSe(self)
         if path is not None:
             path = path / env_mat_stat.get_hash()
diff --git a/deepmd/pt/model/descriptor/se_atten.py b/deepmd/pt/model/descriptor/se_atten.py
index 7165ed0cf4..c4b3757854 100644
--- a/deepmd/pt/model/descriptor/se_atten.py
+++ b/deepmd/pt/model/descriptor/se_atten.py
@@ -207,7 +207,22 @@ def compute_input_stats(
         merged: Union[Callable[[], List[dict]], List[dict]],
         path: Optional[DPPath] = None,
     ):
-        """Update mean and stddev for descriptor elements."""
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
         env_mat_stat = EnvMatStatSe(self)
         if path is not None:
             path = path / env_mat_stat.get_hash()
diff --git a/deepmd/pt/model/descriptor/se_r.py b/deepmd/pt/model/descriptor/se_r.py
index 4e7e516065..643d1ad558 100644
--- a/deepmd/pt/model/descriptor/se_r.py
+++ b/deepmd/pt/model/descriptor/se_r.py
@@ -191,7 +191,22 @@ def compute_input_stats(
         merged: Union[Callable[[], List[dict]], List[dict]],
         path: Optional[DPPath] = None,
     ):
-        """Update mean and stddev for descriptor elements."""
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
         env_mat_stat = EnvMatStatSe(self)
         if path is not None:
             path = path / env_mat_stat.get_hash()
diff --git a/deepmd/pt/model/task/dipole.py b/deepmd/pt/model/task/dipole.py
index 6956d2ce25..7d2dd221db 100644
--- a/deepmd/pt/model/task/dipole.py
+++ b/deepmd/pt/model/task/dipole.py
@@ -141,6 +141,22 @@ def compute_output_stats(
         merged: Union[Callable[[], List[dict]], List[dict]],
         stat_file_path: Optional[DPPath] = None,
     ):
+        """
+        Compute the output statistics (e.g. energy bias) for the fitting net from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        stat_file_path : Optional[DPPath]
+            The path to the stat file.
+
+        """
         raise NotImplementedError
 
     def forward(
diff --git a/deepmd/pt/model/task/ener.py b/deepmd/pt/model/task/ener.py
index 404b92a10a..29ed5acaad 100644
--- a/deepmd/pt/model/task/ener.py
+++ b/deepmd/pt/model/task/ener.py
@@ -145,6 +145,22 @@ def compute_output_stats(
         merged: Union[Callable[[], List[dict]], List[dict]],
         stat_file_path: Optional[DPPath] = None,
     ):
+        """
+        Compute the output statistics (e.g. energy bias) for the fitting net from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        stat_file_path : Optional[DPPath]
+            The path to the stat file.
+
+        """
         if stat_file_path is not None:
             stat_file_path = stat_file_path / "bias_atom_e"
         if stat_file_path is not None and stat_file_path.is_file():
diff --git a/deepmd/pt/model/task/polarizability.py b/deepmd/pt/model/task/polarizability.py
index 3c40e9f734..9483d1eb4a 100644
--- a/deepmd/pt/model/task/polarizability.py
+++ b/deepmd/pt/model/task/polarizability.py
@@ -168,6 +168,22 @@ def compute_output_stats(
         merged: Union[Callable[[], List[dict]], List[dict]],
         stat_file_path: Optional[DPPath] = None,
     ):
+        """
+        Compute the output statistics (e.g. energy bias) for the fitting net from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], List[dict]], List[dict]]
+            - List[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], List[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        stat_file_path : Optional[DPPath]
+            The path to the stat file.
+
+        """
         raise NotImplementedError
 
     def forward(

From 35598d2d49da07a81f4680a88e16fa1b4ec8e915 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 29 Feb 2024 19:16:58 +0800
Subject: [PATCH 28/37] replace lazy with functools.lru_cache

---
 deepmd/pt/train/training.py   |  4 ++--
 deepmd/pt/utils/dataloader.py | 17 -----------------
 2 files changed, 2 insertions(+), 19 deletions(-)

diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index 21a285f540..6fe8562ad6 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import functools
 import logging
 import time
 from copy import (
@@ -38,7 +39,6 @@
 from deepmd.pt.utils.dataloader import (
     BufferedIterator,
     get_weighted_sampler,
-    lazy,
 )
 from deepmd.pt.utils.env import (
     DEVICE,
@@ -214,7 +214,7 @@ def get_single_model(
                 _validation_data.add_data_requirement(_data_requirement)
             if not resuming:
 
-                @lazy
+                @functools.lru_cache
                 def get_sample():
                     sampled = make_stat_input(
                         _training_data.systems,
diff --git a/deepmd/pt/utils/dataloader.py b/deepmd/pt/utils/dataloader.py
index b197f46124..65a96418c9 100644
--- a/deepmd/pt/utils/dataloader.py
+++ b/deepmd/pt/utils/dataloader.py
@@ -262,20 +262,3 @@ def get_weighted_sampler(training_data, prob_style, sys_prob=False):
     with torch.device("cpu"):
         sampler = WeightedRandomSampler(probs, len_sampler, replacement=True)
     return sampler
-
-
-class LazyFunction:
-    def __init__(self, func):
-        self.func = func
-        self.result = None
-        self.called = False
-
-    def __call__(self, *args, **kwargs):
-        if not self.called:
-            self.result = self.func(*args, **kwargs)
-            self.called = True
-        return self.result
-
-
-def lazy(func):
-    return LazyFunction(func)

From c0a0cfcd9d12ecec2c3ed617a47aba3b9767c4cf Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 29 Feb 2024 19:26:06 +0800
Subject: [PATCH 29/37] Update training.py

---
 deepmd/pt/train/training.py | 76 +++++++++++++++----------------------
 1 file changed, 30 insertions(+), 46 deletions(-)

diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index 6fe8562ad6..1e25da77fb 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -128,61 +128,45 @@ def get_opt_param(params):
             return opt_type, opt_param
 
         def get_data_loader(_training_data, _validation_data, _training_params):
-            if "auto_prob" in _training_params["training_data"]:
-                train_sampler = get_weighted_sampler(
-                    _training_data, _training_params["training_data"]["auto_prob"]
-                )
-            elif "sys_probs" in _training_params["training_data"]:
-                train_sampler = get_weighted_sampler(
-                    _training_data,
-                    _training_params["training_data"]["sys_probs"],
-                    sys_prob=True,
-                )
-            else:
-                train_sampler = get_weighted_sampler(_training_data, "prob_sys_size")
-
-            if train_sampler is None:
-                log.warning(
-                    "Sampler not specified!"
-                )  # None sampler will lead to a premature stop iteration. Replacement should be True in attribute of the sampler to produce expected number of items in one iteration.
-            training_dataloader = DataLoader(
-                _training_data,
-                sampler=train_sampler,
-                batch_size=None,
-                num_workers=NUM_WORKERS,  # setting to 0 diverges the behavior of its iterator; should be >=1
-                drop_last=False,
-                pin_memory=True,
-            )
-            with torch.device("cpu"):
-                training_data_buffered = BufferedIterator(iter(training_dataloader))
-            if _validation_data is not None:
-                if "auto_prob" in _training_params["validation_data"]:
-                    valid_sampler = get_weighted_sampler(
-                        _validation_data,
-                        _training_params["validation_data"]["auto_prob"],
+            def get_dataloader_and_buffer(_data, _params):
+                if "auto_prob" in _training_params["training_data"]:
+                    _sampler = get_weighted_sampler(
+                        _data, _params["training_data"]["auto_prob"]
                     )
-                elif "sys_probs" in _training_params["validation_data"]:
-                    valid_sampler = get_weighted_sampler(
-                        _validation_data,
-                        _training_params["validation_data"]["sys_probs"],
+                elif "sys_probs" in _training_params["training_data"]:
+                    _sampler = get_weighted_sampler(
+                        _data,
+                        _params["training_data"]["sys_probs"],
                         sys_prob=True,
                     )
                 else:
-                    valid_sampler = get_weighted_sampler(
-                        _validation_data, "prob_sys_size"
-                    )
-                validation_dataloader = DataLoader(
-                    _validation_data,
-                    sampler=valid_sampler,
+                    _sampler = get_weighted_sampler(_data, "prob_sys_size")
+
+                if _sampler is None:
+                    log.warning(
+                        "Sampler not specified!"
+                    )  # None sampler will lead to a premature stop iteration. Replacement should be True in attribute of the sampler to produce expected number of items in one iteration.
+                _dataloader = DataLoader(
+                    _data,
+                    sampler=_sampler,
                     batch_size=None,
-                    num_workers=min(NUM_WORKERS, 1),
+                    num_workers=NUM_WORKERS,  # setting to 0 diverges the behavior of its iterator; should be >=1
                     drop_last=False,
                     pin_memory=True,
                 )
                 with torch.device("cpu"):
-                    validation_data_buffered = BufferedIterator(
-                        iter(validation_dataloader)
-                    )
+                    _data_buffered = BufferedIterator(iter(_dataloader))
+                return _dataloader, _data_buffered
+
+            training_dataloader, training_data_buffered = get_dataloader_and_buffer(
+                _training_data, _training_params
+            )
+
+            if _validation_data is not None:
+                (
+                    validation_dataloader,
+                    validation_data_buffered,
+                ) = get_dataloader_and_buffer(_validation_data, _training_params)
                 if _training_params.get("validation_data", None) is not None:
                     valid_numb_batch = _training_params["validation_data"].get(
                         "numb_btch", 1

From 66edca55617edde721a0fd6b32a45b133defa118 Mon Sep 17 00:00:00 2001
From: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Date: Thu, 29 Feb 2024 20:17:00 +0800
Subject: [PATCH 30/37] Update deepmd/pt/train/training.py

Signed-off-by: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
---
 deepmd/pt/train/training.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index 1e25da77fb..bdbee19108 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -167,12 +167,7 @@ def get_dataloader_and_buffer(_data, _params):
                     validation_dataloader,
                     validation_data_buffered,
                 ) = get_dataloader_and_buffer(_validation_data, _training_params)
-                if _training_params.get("validation_data", None) is not None:
-                    valid_numb_batch = _training_params["validation_data"].get(
-                        "numb_btch", 1
-                    )
-                else:
-                    valid_numb_batch = 1
+                valid_numb_batch = _training_params["validation_data"].get("numb_btch", 1)
             else:
                 validation_dataloader = None
                 validation_data_buffered = None

From d5a1549bbbdc1e9ed389d09bc79e856569e2a90f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 29 Feb 2024 12:17:21 +0000
Subject: [PATCH 31/37] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 deepmd/pt/train/training.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index bdbee19108..b8d13e6f25 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -167,7 +167,9 @@ def get_dataloader_and_buffer(_data, _params):
                     validation_dataloader,
                     validation_data_buffered,
                 ) = get_dataloader_and_buffer(_validation_data, _training_params)
-                valid_numb_batch = _training_params["validation_data"].get("numb_btch", 1)
+                valid_numb_batch = _training_params["validation_data"].get(
+                    "numb_btch", 1
+                )
             else:
                 validation_dataloader = None
                 validation_data_buffered = None

From e17546ad6888257926c11a1ca0929069f4264295 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Fri, 1 Mar 2024 00:39:50 +0800
Subject: [PATCH 32/37] Update test_multitask.py

---
 source/tests/pt/test_multitask.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/source/tests/pt/test_multitask.py b/source/tests/pt/test_multitask.py
index 0c1abf1f44..3c0240dbdc 100644
--- a/source/tests/pt/test_multitask.py
+++ b/source/tests/pt/test_multitask.py
@@ -15,6 +15,9 @@
 from deepmd.pt.entrypoints.main import (
     get_trainer,
 )
+from deepmd.pt.utils.multi_task import (
+    preprocess_shared_params,
+)
 
 from .model.test_permutation import (
     model_dpa1,
@@ -29,7 +32,7 @@
 
 class MultiTaskTrainTest:
     def test_multitask_train(self):
-        trainer = get_trainer(deepcopy(self.config))
+        trainer = get_trainer(deepcopy(self.config), shared_links=self.shared_links)
         trainer.run()
         # check model keys
         self.assertEqual(len(trainer.wrapper.model), 2)
@@ -90,6 +93,9 @@ def setUp(self):
         ] = f"{self.stat_files}/model_2"
         self.config["training"]["numb_steps"] = 1
         self.config["training"]["save_freq"] = 1
+        self.config["model"], self.shared_links = preprocess_shared_params(
+            self.config["model"]
+        )
 
     def tearDown(self) -> None:
         MultiTaskTrainTest.tearDown(self)
@@ -125,6 +131,9 @@ def setUp(self):
         ] = f"{self.stat_files}/model_2"
         self.config["training"]["numb_steps"] = 1
         self.config["training"]["save_freq"] = 1
+        self.config["model"], self.shared_links = preprocess_shared_params(
+            self.config["model"]
+        )
 
     def tearDown(self) -> None:
         MultiTaskTrainTest.tearDown(self)
@@ -160,6 +169,9 @@ def setUp(self):
         ] = f"{self.stat_files}/model_2"
         self.config["training"]["numb_steps"] = 1
         self.config["training"]["save_freq"] = 1
+        self.config["model"], self.shared_links = preprocess_shared_params(
+            self.config["model"]
+        )
 
     def tearDown(self) -> None:
         MultiTaskTrainTest.tearDown(self)

From 1debf4f7a00c3dbe9e60237be470ff4a04a5db91 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Fri, 1 Mar 2024 01:23:12 +0800
Subject: [PATCH 33/37] Fix h5py files in multitask DDP

---
 deepmd/pt/entrypoints/main.py | 13 ++++++++++---
 deepmd/pt/train/training.py   |  4 ++--
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/deepmd/pt/entrypoints/main.py b/deepmd/pt/entrypoints/main.py
index 12a3a01187..023bc5305e 100644
--- a/deepmd/pt/entrypoints/main.py
+++ b/deepmd/pt/entrypoints/main.py
@@ -101,7 +101,7 @@ def get_trainer(
     config["model"]["resuming"] = (finetune_model is not None) or (ckpt is not None)
 
     def prepare_trainer_input_single(
-        model_params_single, data_dict_single, loss_dict_single, suffix=""
+        model_params_single, data_dict_single, loss_dict_single, suffix="", rank=0
     ):
         training_dataset_params = data_dict_single["training_data"]
         type_split = False
@@ -115,7 +115,9 @@ def prepare_trainer_input_single(
 
         # stat files
         stat_file_path_single = data_dict_single.get("stat_file", None)
-        if stat_file_path_single is not None:
+        if rank != 0:
+            stat_file_path_single = None
+        elif stat_file_path_single is not None:
             if Path(stat_file_path_single).is_dir():
                 raise ValueError(
                     f"stat_file should be a file, not a directory: {stat_file_path_single}"
@@ -153,13 +155,17 @@ def prepare_trainer_input_single(
             stat_file_path_single,
         )
 
+    rank = dist.get_rank() if dist.is_initialized() else 0
     if not multi_task:
         (
             train_data,
             validation_data,
             stat_file_path,
         ) = prepare_trainer_input_single(
-            config["model"], config["training"], config["loss"]
+            config["model"],
+            config["training"],
+            config["loss"],
+            rank=rank,
         )
     else:
         train_data, validation_data, stat_file_path = {}, {}, {}
@@ -173,6 +179,7 @@ def prepare_trainer_input_single(
                 config["training"]["data_dict"][model_key],
                 config["loss_dict"][model_key],
                 suffix=f"_{model_key}",
+                rank=rank,
             )
 
     trainer = training.Trainer(
diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index b8d13e6f25..1003b499d6 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -193,7 +193,7 @@ def get_single_model(
             _training_data.add_data_requirement(_data_requirement)
             if _validation_data is not None:
                 _validation_data.add_data_requirement(_data_requirement)
-            if not resuming:
+            if not resuming and self.rank == 0:
 
                 @functools.lru_cache
                 def get_sample():
@@ -429,7 +429,7 @@ def get_loss(loss_params, start_lr, _ntypes):
 
         # Multi-task share params
         if shared_links is not None:
-            self.wrapper.share_params(shared_links, resume=resuming)
+            self.wrapper.share_params(shared_links, resume=resuming or self.rank != 0)
 
         if dist.is_initialized():
             torch.cuda.set_device(LOCAL_RANK)

From db31edc0d408adf9ad3542d8589d8fbaedcd7a44 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Fri, 1 Mar 2024 01:47:21 +0800
Subject: [PATCH 34/37] FIx h5py file read block

---
 deepmd/pt/train/training.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index 1003b499d6..ef8a53e656 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -63,6 +63,10 @@
     DataLoader,
 )
 
+from deepmd.utils.path import (
+    DPH5Path,
+)
+
 log = logging.getLogger(__name__)
 
 
@@ -208,6 +212,8 @@ def get_sample():
                     sampled_func=get_sample,
                     stat_file_path=_stat_file_path,
                 )
+                if isinstance(_stat_file_path, DPH5Path):
+                    _stat_file_path.root.close()
             return model
 
         def get_lr(lr_params):

From 3dfc31ee3beec282fbab4f838586cfb002a4498e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 1 Mar 2024 03:24:30 +0000
Subject: [PATCH 35/37] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 deepmd/pt/model/descriptor/hybrid.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deepmd/pt/model/descriptor/hybrid.py b/deepmd/pt/model/descriptor/hybrid.py
index 5dd550d8e3..9952c4766a 100644
--- a/deepmd/pt/model/descriptor/hybrid.py
+++ b/deepmd/pt/model/descriptor/hybrid.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    Callable,
     Any,
+    Callable,
     Dict,
     List,
     Optional,

From 615446f8cf0768f15750f556db213af5a3752019 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Fri, 1 Mar 2024 12:16:35 +0800
Subject: [PATCH 36/37] Update hybrid.py

---
 deepmd/dpmodel/descriptor/hybrid.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/deepmd/dpmodel/descriptor/hybrid.py b/deepmd/dpmodel/descriptor/hybrid.py
index d2620fdcf7..46f2616b84 100644
--- a/deepmd/dpmodel/descriptor/hybrid.py
+++ b/deepmd/dpmodel/descriptor/hybrid.py
@@ -127,6 +127,14 @@ def mixed_types(self):
         """
         return any(descrpt.mixed_types() for descrpt in self.descrpt_list)
 
+    def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        raise NotImplementedError
+
     def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
         """Update mean and stddev for descriptor elements."""
         for descrpt in self.descrpt_list:

From e26c118cbbb72f8e81662bab7c69cd12e8dc7b36 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Fri, 1 Mar 2024 14:04:39 +0800
Subject: [PATCH 37/37] Update hybrid.py

---
 deepmd/pt/model/descriptor/hybrid.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/deepmd/pt/model/descriptor/hybrid.py b/deepmd/pt/model/descriptor/hybrid.py
index 9952c4766a..b53adca462 100644
--- a/deepmd/pt/model/descriptor/hybrid.py
+++ b/deepmd/pt/model/descriptor/hybrid.py
@@ -140,6 +140,23 @@ def mixed_types(self):
         """
         return any(descrpt.mixed_types() for descrpt in self.descrpt_list)
 
+    def share_params(self, base_class, shared_level, resume=False):
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some seperated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only descriptors of the same type can share params!"
+        if shared_level == 0:
+            for ii, des in enumerate(self.descrpt_list):
+                self.descrpt_list[ii].share_params(
+                    base_class.descrpt_list[ii], shared_level, resume=resume
+                )
+        else:
+            raise NotImplementedError
+
     def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
         """Update mean and stddev for descriptor elements."""
         for descrpt in self.descrpt_list: