deepmodeling · njzjz · Jun 13, 2024 · May 22, 2024 · May 22, 2024 · May 22, 2024
diff --git a/deepmd/dpmodel/atomic_model/base_atomic_model.py b/deepmd/dpmodel/atomic_model/base_atomic_model.py
@@ -110,6 +110,15 @@
             ]
         )
 
+    def update_type_params(
+        self,
+        state_dict: Dict[str, np.ndarray],
+        mapping_index: List[int],
+        prefix: str = "",
+    ) -> Dict[str, np.ndarray]:
+        """Update the type related params when loading from pretrained model with redundant types."""
+        raise NotImplementedError
+
     def forward_common_atomic(
         self,
         extended_coord: np.ndarray,

diff --git a/deepmd/dpmodel/atomic_model/make_base_atomic_model.py b/deepmd/dpmodel/atomic_model/make_base_atomic_model.py
@@ -136,6 +136,16 @@
         def deserialize(cls, data: dict):
             pass
 
+        @abstractmethod
+        def update_type_params(
+            self,
+            state_dict: Dict[str, t_tensor],
+            mapping_index: List[int],
+            prefix: str = "",
+        ) -> Dict[str, t_tensor]:
+            """Update the type related params when loading from pretrained model with redundant types."""
+            pass
+
         def make_atom_mask(
             self,
             atype: t_tensor,

diff --git a/deepmd/dpmodel/descriptor/dpa1.py b/deepmd/dpmodel/descriptor/dpa1.py
@@ -29,6 +29,7 @@
 from typing import (
     Any,
     Callable,
+    Dict,
     List,
     Optional,
     Tuple,
@@ -364,6 +365,15 @@
         """
         raise NotImplementedError
 
+    def update_type_params(
+        self,
+        state_dict: Dict[str, np.ndarray],
+        mapping_index: List[int],
+        prefix: str = "",
+    ) -> Dict[str, np.ndarray]:
+        """Update the type related params when loading from pretrained model with redundant types."""
+        raise NotImplementedError
+
     @property
     def dim_out(self):
         return self.get_dim_out()

diff --git a/deepmd/dpmodel/descriptor/dpa2.py b/deepmd/dpmodel/descriptor/dpa2.py
@@ -28,6 +28,7 @@
     __version__ = "unknown"
 
 from typing import (
+    Dict,
     List,
     Optional,
     Tuple,
@@ -539,6 +540,15 @@
         """
         raise NotImplementedError
 
+    def update_type_params(
+        self,
+        state_dict: Dict[str, np.ndarray],
+        mapping_index: List[int],
+        prefix: str = "",
+    ) -> Dict[str, np.ndarray]:
+        """Update the type related params when loading from pretrained model with redundant types."""
+        raise NotImplementedError
+
     @property
     def dim_out(self):
         return self.get_dim_out()

diff --git a/deepmd/dpmodel/descriptor/hybrid.py b/deepmd/dpmodel/descriptor/hybrid.py
@@ -152,6 +152,15 @@
         """
         raise NotImplementedError
 
+    def update_type_params(
+        self,
+        state_dict: Dict[str, np.ndarray],
+        mapping_index: List[int],
+        prefix: str = "",
+    ) -> Dict[str, np.ndarray]:
+        """Update the type related params when loading from pretrained model with redundant types."""
+        raise NotImplementedError
+
     def compute_input_stats(self, merged: List[dict], path: Optional[DPPath] = None):
         """Update mean and stddev for descriptor elements."""
         for descrpt in self.descrpt_list:

diff --git a/deepmd/dpmodel/descriptor/make_base_descriptor.py b/deepmd/dpmodel/descriptor/make_base_descriptor.py
@@ -5,6 +5,7 @@
 )
 from typing import (
     Callable,
+    Dict,
     List,
     Optional,
     Union,
@@ -105,6 +106,16 @@
             """
             pass
 
+        @abstractmethod
+        def update_type_params(
+            self,
+            state_dict: Dict[str, t_tensor],
+            mapping_index: List[int],
+            prefix: str = "",
+        ) -> Dict[str, t_tensor]:
+            """Update the type related params when loading from pretrained model with redundant types."""
+            pass
+
         def compute_input_stats(
             self,
             merged: Union[Callable[[], List[dict]], List[dict]],

diff --git a/deepmd/dpmodel/descriptor/se_e2_a.py b/deepmd/dpmodel/descriptor/se_e2_a.py
@@ -24,6 +24,7 @@
 import copy
 from typing import (
     Any,
+    Dict,
     List,
     Optional,
     Tuple,
@@ -263,6 +264,15 @@
         """
         raise NotImplementedError
 
+    def update_type_params(
+        self,
+        state_dict: Dict[str, np.ndarray],
+        mapping_index: List[int],
+        prefix: str = "",
+    ) -> Dict[str, np.ndarray]:
+        """Update the type related params when loading from pretrained model with redundant types."""
+        raise NotImplementedError
+
     def get_ntypes(self) -> int:
         """Returns the number of element types."""
         return self.ntypes

diff --git a/deepmd/dpmodel/descriptor/se_r.py b/deepmd/dpmodel/descriptor/se_r.py
@@ -19,6 +19,7 @@
 import copy
 from typing import (
     Any,
+    Dict,
     List,
     Optional,
 )
@@ -219,6 +220,15 @@
         """
         raise NotImplementedError
 
+    def update_type_params(
+        self,
+        state_dict: Dict[str, np.ndarray],
+        mapping_index: List[int],
+        prefix: str = "",
+    ) -> Dict[str, np.ndarray]:
+        """Update the type related params when loading from pretrained model with redundant types."""
+        raise NotImplementedError
+
     def get_ntypes(self) -> int:
         """Returns the number of element types."""
         return self.ntypes

diff --git a/deepmd/dpmodel/fitting/dipole_fitting.py b/deepmd/dpmodel/fitting/dipole_fitting.py
@@ -175,6 +175,15 @@
             ]
         )
 
+    def update_type_params(
+        self,
+        state_dict: Dict[str, np.ndarray],
+        mapping_index: List[int],
+        prefix: str = "",
+    ) -> Dict[str, np.ndarray]:
+        """Update the type related params when loading from pretrained model with redundant types."""
+        raise NotImplementedError
+
     def call(
         self,
         descriptor: np.ndarray,

diff --git a/deepmd/dpmodel/fitting/general_fitting.py b/deepmd/dpmodel/fitting/general_fitting.py
@@ -270,6 +270,15 @@
         obj.nets = NetworkCollection.deserialize(nets)
         return obj
 
+    def update_type_params(
+        self,
+        state_dict: Dict[str, np.ndarray],
+        mapping_index: List[int],
+        prefix: str = "",
+    ) -> Dict[str, np.ndarray]:
+        """Update the type related params when loading from pretrained model with redundant types."""
+        raise NotImplementedError
+
     def _call_common(
         self,
         descriptor: np.ndarray,

diff --git a/deepmd/dpmodel/fitting/make_base_fitting.py b/deepmd/dpmodel/fitting/make_base_fitting.py
@@ -5,6 +5,7 @@
 )
 from typing import (
     Dict,
+    List,
     Optional,
 )
 
@@ -63,6 +64,16 @@
             """Calculate fitting."""
             pass
 
+        @abstractmethod
+        def update_type_params(
+            self,
+            state_dict: Dict[str, t_tensor],
+            mapping_index: List[int],
+            prefix: str = "",
+        ) -> Dict[str, t_tensor]:
+            """Update the type related params when loading from pretrained model with redundant types."""
+            pass
+
         def compute_output_stats(self, merged):
             """Update the output bias for fitting net."""
             raise NotImplementedError

diff --git a/deepmd/dpmodel/fitting/polarizability_fitting.py b/deepmd/dpmodel/fitting/polarizability_fitting.py
@@ -215,6 +215,15 @@
             ]
         )
 
+    def update_type_params(
+        self,
+        state_dict: Dict[str, np.ndarray],
+        mapping_index: List[int],
+        prefix: str = "",
+    ) -> Dict[str, np.ndarray]:
+        """Update the type related params when loading from pretrained model with redundant types."""
+        raise NotImplementedError
+
     def call(
         self,
         descriptor: np.ndarray,

diff --git a/deepmd/main.py b/deepmd/main.py
@@ -255,6 +255,11 @@ def main_parser() -> argparse.ArgumentParser:
         default=None,
         help="Finetune the frozen pretrained model.",
     )
+    parser_train.add_argument(
+        "--use-pretrain-script",
+        action="store_true",
+        help="(Supported Backend: PyTorch) Use model params in the script of the pretrained model instead of user input.",
+    )
     parser_train.add_argument(
         "-o",
         "--output",

diff --git a/deepmd/pt/entrypoints/main.py b/deepmd/pt/entrypoints/main.py
@@ -51,7 +51,7 @@
     DEVICE,
 )
 from deepmd.pt.utils.finetune import (
-    change_finetune_model_params,
+    get_finetune_rules,
 )
 from deepmd.pt.utils.multi_task import (
     preprocess_shared_params,
@@ -78,10 +78,10 @@
     init_model=None,
     restart_model=None,
     finetune_model=None,
-    model_branch="",
     force_load=False,
     init_frz_model=None,
     shared_links=None,
+    finetune_links=None,
 ):
     multi_task = "model_dict" in config.get("model", {})
 
@@ -92,23 +92,8 @@
         assert dist.is_nccl_available()
         dist.init_process_group(backend="nccl")
 
-    ckpt = init_model if init_model is not None else restart_model
-    finetune_links = None
-    if finetune_model is not None:
-        config["model"], finetune_links = change_finetune_model_params(
-            finetune_model,
-            config["model"],
-            model_branch=model_branch,
-        )
-    config["model"]["resuming"] = (finetune_model is not None) or (ckpt is not None)
-
-    def prepare_trainer_input_single(
-        model_params_single, data_dict_single, loss_dict_single, suffix="", rank=0
-    ):
+    def prepare_trainer_input_single(model_params_single, data_dict_single, rank=0):
         training_dataset_params = data_dict_single["training_data"]
-        type_split = False
-        if model_params_single["descriptor"]["type"] in ["se_e2_a"]:
-            type_split = True
         validation_dataset_params = data_dict_single.get("validation_data", None)
         validation_systems = (
             validation_dataset_params["systems"] if validation_dataset_params else None
@@ -141,18 +126,11 @@
             if validation_systems
             else None
         )
-        if ckpt or finetune_model:
-            train_data_single = DpLoaderSet(
-                training_systems,
-                training_dataset_params["batch_size"],
-                model_params_single["type_map"],
-            )
-        else:
-            train_data_single = DpLoaderSet(
-                training_systems,
-                training_dataset_params["batch_size"],
-                model_params_single["type_map"],
-            )
+        train_data_single = DpLoaderSet(
+            training_systems,
+            training_dataset_params["batch_size"],
+            model_params_single["type_map"],
+        )
         return (
             train_data_single,
             validation_data_single,
@@ -168,7 +146,6 @@
         ) = prepare_trainer_input_single(
             config["model"],
             config["training"],
-            config["loss"],
             rank=rank,
         )
     else:
@@ -181,8 +158,6 @@
             ) = prepare_trainer_input_single(
                 config["model"]["model_dict"][model_key],
                 config["training"]["data_dict"][model_key],
-                config["loss_dict"][model_key],
-                suffix=f"_{model_key}",
                 rank=rank,
             )
 
@@ -242,6 +217,16 @@
     if multi_task:
         config["model"], shared_links = preprocess_shared_params(config["model"])
 
+    # update fine-tuning config
+    finetune_links = None
+    if FLAGS.finetune is not None:
+        config["model"], finetune_links = get_finetune_rules(
+            FLAGS.finetune,
+            config["model"],
+            model_branch=FLAGS.model_branch,
+            change_model_params=FLAGS.use_pretrain_script,
+        )
+
     # argcheck
     if not multi_task:
         config = update_deepmd_input(config, warning=True, dump="input_v2_compat.json")
@@ -276,10 +261,10 @@
         FLAGS.init_model,
         FLAGS.restart,
         FLAGS.finetune,
-        FLAGS.model_branch,
         FLAGS.force_load,
         FLAGS.init_frz_model,
         shared_links=shared_links,
+        finetune_links=finetune_links,
     )
     trainer.run()
 

diff --git a/deepmd/pt/infer/deep_eval.py b/deepmd/pt/infer/deep_eval.py
@@ -118,7 +118,6 @@ def __init__(
                             item.replace(f"model.{head}.", "model.Default.")
                         ] = state_dict[item].clone()
                 state_dict = state_dict_head
-            self.input_param["resuming"] = True
             model = get_model(self.input_param).to(DEVICE)
             model = torch.jit.script(model)
             self.dp = ModelWrapper(model)

diff --git a/deepmd/pt/infer/inference.py b/deepmd/pt/infer/inference.py
@@ -56,7 +56,6 @@ def __init__(
             state_dict = state_dict_head
 
         self.model_params = deepcopy(model_params)
-        model_params["resuming"] = True
         self.model = get_model(model_params).to(DEVICE)
 
         # Model Wrapper