meta-llama · SLR722 · Jan 4, 2025 · Dec 31, 2024 · Jan 2, 2025 · Jan 2, 2025
@@ -90,18 +90,24 @@ def save_checkpoint(
         model_file_path.mkdir(parents=True, exist_ok=True)
 
         # copy the related files for inference
-        shutil.copy(
-            Path.joinpath(self._checkpoint_dir, "params.json"),
-            Path.joinpath(model_file_path, "params.json"),
-        )
-        shutil.copy(
-            Path.joinpath(self._checkpoint_dir, "tokenizer.model"),
-            Path.joinpath(model_file_path, "tokenizer.model"),
-        )
-        shutil.copy(
-            Path.joinpath(self._checkpoint_dir, "orig_params.json"),
-            Path.joinpath(model_file_path, "orig_params.json"),
-        )
+        source_path = Path.joinpath(self._checkpoint_dir, "params.json")
+        if source_path.exists():
+            shutil.copy(
+                source_path,
+                Path.joinpath(model_file_path, "params.json"),
+            )
+        source_path = Path.joinpath(self._checkpoint_dir, "tokenizer.model")
+        if source_path.exists():
+            shutil.copy(
+                source_path,
+                Path.joinpath(model_file_path, "tokenizer.model"),
+            )
+        source_path = Path.joinpath(self._checkpoint_dir, "orig_params.json")
+        if source_path.exists():
+            shutil.copy(
+                source_path,
+                Path.joinpath(model_file_path, "orig_params.json"),
+            )
 
         if not adapter_only:
             model_state_dict = state_dict[training.MODEL_KEY]

@@ -21,8 +21,9 @@
 
 from pydantic import BaseModel
 
-from torchtune.models.llama3 import llama3_tokenizer, lora_llama3_8b
+from torchtune.models.llama3 import llama3_tokenizer
 from torchtune.models.llama3._tokenizer import Llama3Tokenizer
+from torchtune.models.llama3_1 import lora_llama3_1_8b
 from torchtune.models.llama3_2 import lora_llama3_2_3b
 
 
@@ -49,8 +50,8 @@ class DatasetSchema(BaseModel):
         tokenizer_type=llama3_tokenizer,
         checkpoint_type="LLAMA3_2",
     ),
-    "Llama-3-8B-Instruct": ModelConfig(
-        model_definition=lora_llama3_8b,
+    "Llama3.1-8B-Instruct": ModelConfig(
+        model_definition=lora_llama3_1_8b,
         tokenizer_type=llama3_tokenizer,
         checkpoint_type="LLAMA3",
     ),