resolve merge conflict, add weight conversion tests for fp16

huggingface · Oct 2, 2024 · 1d13301 · 1d13301
1 parent c4339ad
commit 1d13301
Showing 1 changed file with 28 additions and 1 deletion.
diff --git a/tests/quantization/ggml/test_ggml.py b/tests/quantization/ggml/test_ggml.py
@@ -533,7 +533,8 @@ def test_stablelm_fp16(self):
             self.stablelm2_model_id,
             gguf_file=self.fp16_stablelm2_model_id,
             torch_dtype=torch.float16,
-            # it needs to use the original model config as quantized one is different
+            # for precise comparison it is required to use the original model config
+            # as quantized one is different in parameters: use_parallel_residual and use_qkv_bias
             # and it highly influences on the output results
             config=original_model.config,
         )
@@ -550,6 +551,32 @@ def test_stablelm_fp16(self):
             tokenizer.decode(original_out[0], skip_special_tokens=True),
         )
 
+    def test_stablelm_weights_conversion_fp16(self):
+        original_model = AutoModelForCausalLM.from_pretrained(
+            self.original_stablelm2_model_id,
+            device_map="auto",
+            torch_dtype=torch.float16,
+        )
+
+        converted_model = AutoModelForCausalLM.from_pretrained(
+            self.stablelm2_model_id,
+            gguf_file=self.fp16_stablelm2_model_id,
+            device_map="auto",
+            torch_dtype=torch.float16,
+            # for precise comparison it is required to use the original model config
+            # as quantized one is different in parameters: use_parallel_residual and use_qkv_bias
+            # and it highly influences on the output results
+            config=original_model.config,
+        )
+
+        converted_state_dict = converted_model.state_dict()
+        original_state_dict = original_model.state_dict()
+
+        for layer_name, original_params in original_state_dict.items():
+            if layer_name in converted_state_dict:
+                self.assertTrue(original_params.shape == converted_state_dict[layer_name].shape)
+                torch.testing.assert_close(original_params, converted_state_dict[layer_name])
+
     def test_tokenization_xnli(self):
         import tqdm
         from datasets import load_dataset