huggingface · amyeroberts · May 21, 2024 · May 14, 2024 · May 14, 2024 · May 17, 2024
diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py
@@ -164,6 +164,12 @@
 # Not critical, only usable on the sandboxed CI instance.
 TOKEN = "hf_94wBhPGp6KrrTH3KDchhKpRxZwd6dmHWLL"
 
+if is_torch_available():
+    import torch
+
+    IS_ROCM_SYSTEM = torch.version.hip is not None
+    IS_CUDA_SYSTEM = torch.version.cuda is not None
+
-
+else:
+    IS_ROCM_SYSTEM = False
+    IS_CUDA_SYSTEM = False
-
+else:
+    IS_ROCM_SYSTEM = False
+    IS_CUDA_SYSTEM = False
 
 def parse_flag_from_env(key, default=False):
     try:

diff --git a/tests/models/bark/test_modeling_bark.py b/tests/models/bark/test_modeling_bark.py
@@ -1327,4 +1327,9 @@ def test_generate_end_to_end_with_offload(self):
             output_with_offload = self.model.generate(**input_ids, do_sample=False, temperature=1.0)
 
         # checks if same output
-        self.assertListEqual(output_with_no_offload.tolist(), output_with_offload.tolist())
+        self.assertListAlmostEqual(output_with_no_offload.squeeze().tolist(), output_with_offload.squeeze().tolist())
+
+    def assertListAlmostEqual(self, list1, list2, tol=1e-6):
+        self.assertEqual(len(list1), len(list2))
+        for a, b in zip(list1, list2):
+            self.assertAlmostEqual(a, b, delta=tol)
diff --git a/tests/models/gemma/test_modeling_gemma.py b/tests/models/gemma/test_modeling_gemma.py
@@ -20,6 +20,7 @@
 
 from transformers import AutoModelForCausalLM, AutoTokenizer, GemmaConfig, is_torch_available
 from transformers.testing_utils import (
+    IS_ROCM_SYSTEM,
     is_flaky,
     require_bitsandbytes,
     require_flash_attn,
@@ -570,16 +571,24 @@ def test_model_2b_fp16_static_cache(self):
     @require_read_token
     def test_model_2b_bf16(self):
         model_id = "google/gemma-2b"
-        EXPECTED_TEXTS = {
-            7: [
-                "Hello I am doing a project on the 1990s and I need to know what the most popular music",
-                "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Khichdi",
-            ],
-            8: [
-                "Hello I am doing a project on the 1990s and I need to know what the most popular music",
-                "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
-            ],
-        }
+        if IS_ROCM_SYSTEM:
+            EXPECTED_TEXTS = {
+                9: [
+                    "Hello I am doing a project on the 1990s and I need to know what the most popular music",
+                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
+                ],
+            }
+        else:
+            EXPECTED_TEXTS = {
+                7: [
+                    "Hello I am doing a project on the 1990s and I need to know what the most popular music",
+                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Khichdi",
+                ],
+                8: [
+                    "Hello I am doing a project on the 1990s and I need to know what the most popular music",
+                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
+                ],
+            }
 
         model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).to(
             torch_device
@@ -596,16 +605,24 @@ def test_model_2b_bf16(self):
     @require_read_token
     def test_model_2b_eager(self):
         model_id = "google/gemma-2b"
-        EXPECTED_TEXTS = {
-            7: [
-                "Hello I am doing a project on the 1990s and I am looking for some information on the ",
-                "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
-            ],
-            8: [
-                "Hello I am doing a project on the 1990s and I need to know what the most popular music",
-                "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
-            ],
-        }
+        if IS_ROCM_SYSTEM:
+            EXPECTED_TEXTS = {
+                9: [
+                    "Hello I am doing a project on the 1990s and I need to know what the most popular music",
+                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
+                ],
+            }
+        else:
+            EXPECTED_TEXTS = {
+                7: [
+                    "Hello I am doing a project on the 1990s and I am looking for some information on the ",
+                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
+                ],
+                8: [
+                    "Hello I am doing a project on the 1990s and I need to know what the most popular music",
+                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
+                ],
+            }
 
         model = AutoModelForCausalLM.from_pretrained(
             model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16, attn_implementation="eager"
@@ -624,16 +641,24 @@ def test_model_2b_eager(self):
     @require_read_token
     def test_model_2b_sdpa(self):
         model_id = "google/gemma-2b"
-        EXPECTED_TEXTS = {
-            7: [
-                "Hello I am doing a project on the 1990s and I need to know what the most popular music",
-                "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Khichdi",
-            ],
-            8: [
-                "Hello I am doing a project on the 1990s and I need to know what the most popular music",
-                "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
-            ],
-        }
+        if IS_ROCM_SYSTEM:
+            EXPECTED_TEXTS = {
+                9: [
+                    "Hello I am doing a project on the 1990s and I need to know what the most popular music",
+                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
+                ],
+            }
+        else:
+            EXPECTED_TEXTS = {
+                7: [
+                    "Hello I am doing a project on the 1990s and I need to know what the most popular music",
+                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Khichdi",
+                ],
+                8: [
+                    "Hello I am doing a project on the 1990s and I need to know what the most popular music",
+                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
+                ],
+            }
 
         model = AutoModelForCausalLM.from_pretrained(
             model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16, attn_implementation="sdpa"
@@ -732,16 +757,24 @@ def test_model_7b_fp16(self):
     @require_read_token
     def test_model_7b_bf16(self):
         model_id = "google/gemma-7b"
-        EXPECTED_TEXTS = {
-            7: [
-                """Hello I am doing a project on a 1991 240sx and I am trying to find""",
-                "Hi today I am going to show you how to make a very simple and easy to make a very simple and",
-            ],
-            8: [
-                "Hello I am doing a project for my school and I am trying to make a program that will read a .txt file",
-                "Hi today I am going to show you how to make a very simple and easy to make a very simple and",
-            ],
-        }
+        if IS_ROCM_SYSTEM:
+            EXPECTED_TEXTS = {
+                9: [
+                    "Hello I am doing a project for my school and I am trying to get a servo to move a certain amount of degrees",
+                    "Hi today I am going to show you how to make a very simple and easy to make DIY light up sign",
+                ],
+            }
+        else:
+            EXPECTED_TEXTS = {
+                7: [
+                    """Hello I am doing a project on a 1991 240sx and I am trying to find""",
+                    "Hi today I am going to show you how to make a very simple and easy to make a very simple and",
+                ],
+                8: [
+                    "Hello I am doing a project for my school and I am trying to make a program that will read a .txt file",
+                    "Hi today I am going to show you how to make a very simple and easy to make a very simple and",
+                ],
+            }
 
         model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).to(
             torch_device

diff --git a/tests/models/kosmos2/test_modeling_kosmos2.py b/tests/models/kosmos2/test_modeling_kosmos2.py
@@ -26,7 +26,7 @@
 
 from transformers import AutoModelForVision2Seq, AutoProcessor, Kosmos2Config
 from transformers.models.kosmos2.configuration_kosmos2 import Kosmos2TextConfig, Kosmos2VisionConfig
-from transformers.testing_utils import require_torch, require_vision, slow, torch_device
+from transformers.testing_utils import IS_ROCM_SYSTEM, require_torch, require_vision, slow, torch_device
 from transformers.utils import is_torch_available, is_vision_available
 
 from ...test_configuration_common import ConfigTester
@@ -562,6 +562,8 @@ def test_snowman_image_captioning(self):
         processed_text = processed_text[0]
         final_text, entities = final_text_with_entities[0]
 
+        atol = 1e-4 if IS_ROCM_SYSTEM else 1e-5
+
         np.testing.assert_allclose(
             torch.concat(scores[1:4])[:3, :3].to("cpu").numpy(),
             np.array(
@@ -571,7 +573,7 @@ def test_snowman_image_captioning(self):
                     [-0.9352350831031799, -4.688288688659668, 6.240612983703613],
                 ]
             ),
-            atol=1e-5,
+            atol=atol,
         )
         np.testing.assert_allclose(
             torch.concat(scores[-3:])[-3:, -3:].to("cpu").numpy(),
@@ -629,7 +631,7 @@ def test_snowman_image_captioning(self):
                     [-0.7624598741531372, -4.771658897399902, 6.576295852661133],
                 ]
             ),
-            atol=1e-5,
+            atol=atol,
         )
         np.testing.assert_allclose(
             torch.concat(scores[-3:])[-3:, -3:].to("cpu").numpy(),

diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py
@@ -24,6 +24,7 @@
 
 from transformers import LlamaConfig, StaticCache, is_torch_available, set_seed
 from transformers.testing_utils import (
+    IS_ROCM_SYSTEM,
     require_bitsandbytes,
     require_flash_attn,
     require_read_token,
@@ -688,22 +689,35 @@ def test_compile_static_cache(self):
         NUM_TOKENS_TO_GENERATE = 40
         # Note on `EXPECTED_TEXT_COMPLETION`'s diff: the current value matches the original test if the original test
         # was changed to have a cache of 53 tokens (as opposed to 4096), on Ampere GPUs.
-        EXPECTED_TEXT_COMPLETION = {
-            8: [
-                "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial "
-                "reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe "
-                "theory of relativ",
-                "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, "
-                "my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p",
-            ],
-            7: [
-                "Simply put, the theory of relativity states that 1. surely nothing is faster than light.\nThe theory "
-                "goes that nothing travels faster than light, but the faster you go, the slower everything else will "
-                "be.\nThe theory of relativity",
-                "My favorite all time favorite condiment is ketchup. I love it on hamburgers, hot dogs, fries, eggs, "
-                "and even on a good old fashioned cheeseburger. I love it on everything. I love it so",
-            ],
-        }
+        if IS_ROCM_SYSTEM:
+            EXPECTED_TEXT_COMPLETION = {
+                9: [
+                    "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial"
+                    " reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe "
+                    "theory of relativ",
+                    "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs,"
+                    " my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p",
+                ],
+            }
+            expected_text_completion_idx = 9
+        else:
+            EXPECTED_TEXT_COMPLETION = {
+                8: [
+                    "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial "
+                    "reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe "
+                    "theory of relativ",
+                    "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, "
+                    "my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p",
+                ],
+                7: [
+                    "Simply put, the theory of relativity states that 1. surely nothing is faster than light.\nThe theory "
+                    "goes that nothing travels faster than light, but the faster you go, the slower everything else will "
+                    "be.\nThe theory of relativity",
+                    "My favorite all time favorite condiment is ketchup. I love it on hamburgers, hot dogs, fries, eggs, "
+                    "and even on a good old fashioned cheeseburger. I love it on everything. I love it so",
+                ],
+            }
+            expected_text_completion_idx = 8
 
         prompts = [
             "Simply put, the theory of relativity states that ",
@@ -718,7 +732,9 @@ def test_compile_static_cache(self):
         # Dynamic Cache
         generated_ids = model.generate(**inputs, max_new_tokens=NUM_TOKENS_TO_GENERATE, do_sample=False)
         dynamic_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
-        self.assertEqual(EXPECTED_TEXT_COMPLETION[8], dynamic_text)  # Both GPU architectures have the same output
+        self.assertEqual(
+            EXPECTED_TEXT_COMPLETION[expected_text_completion_idx], dynamic_text
+        )  # Both GPU architectures have the same output
 
         # Static Cache
         generated_ids = model.generate(

diff --git a/tests/models/mixtral/test_modeling_mixtral.py b/tests/models/mixtral/test_modeling_mixtral.py
@@ -22,6 +22,7 @@
 
 from transformers import MixtralConfig, is_torch_available
 from transformers.testing_utils import (
+    IS_ROCM_SYSTEM,
     is_flaky,
     require_flash_attn,
     require_torch,
@@ -529,14 +530,21 @@ def test_small_model_logits(self):
         )
         # TODO: might need to tweak it in case the logits do not match on our daily runners
         # these logits have been obtained with the original megablocks impelmentation.
-        EXPECTED_LOGITS = {
-            7: torch.Tensor([[0.1670, 0.1620, 0.6094], [-0.8906, -0.1588, -0.6060], [0.1572, 0.1290, 0.7246]]).to(
-                torch_device
-            ),
-            8: torch.Tensor([[0.1631, 0.1621, 0.6094], [-0.8906, -0.1621, -0.6094], [0.1572, 0.1270, 0.7227]]).to(
-                torch_device
-            ),
-        }
+        if IS_ROCM_SYSTEM:
+            EXPECTED_LOGITS = {
+                9: torch.Tensor([[0.1641, 0.1621, 0.6094], [-0.8945, -0.1631, -0.6094], [0.1572, 0.1260, 0.7227]]).to(
+                    torch_device
+                ),
+            }
+        else:
+            EXPECTED_LOGITS = {
+                7: torch.Tensor([[0.1670, 0.1620, 0.6094], [-0.8906, -0.1588, -0.6060], [0.1572, 0.1290, 0.7246]]).to(
+                    torch_device
+                ),
+                8: torch.Tensor([[0.1631, 0.1621, 0.6094], [-0.8906, -0.1621, -0.6094], [0.1572, 0.1270, 0.7227]]).to(
+                    torch_device
+                ),
+            }
         with torch.no_grad():
             logits = model(dummy_input).logits
 
@@ -559,32 +567,51 @@ def test_small_model_logits_batched(self):
         )
 
         # TODO: might need to tweak it in case the logits do not match on our daily runners
-        EXPECTED_LOGITS_LEFT = {
-            7: torch.Tensor(
-                [[0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007]],
-            ).to(torch_device),
-            8: torch.Tensor([[0.1914, 0.0508, 0.7188], [0.1953, 0.0510, 0.7227], [0.1973, 0.0562, 0.7148]]).to(
-                torch_device
-            ),
-        }
+        if IS_ROCM_SYSTEM:
+            EXPECTED_LOGITS_LEFT = {
+                9: torch.Tensor([[0.1904, 0.0513, 0.7227], [0.1943, 0.0518, 0.7227], [0.1982, 0.0557, 0.7148]]).to(
+                    torch_device
+                ),
+            }
 
-        EXPECTED_LOGITS_LEFT_UNPADDED = {
-            7: torch.Tensor(
-                [[0.2212, 0.5200, -0.3816], [0.8213, -0.2313, 0.6069], [0.2664, -0.7090, 0.2468]],
-            ).to(torch_device),
-            8: torch.Tensor([[0.2217, 0.5195, -0.3828], [0.8203, -0.2295, 0.6055], [0.2676, -0.7109, 0.2461]]).to(
-                torch_device
-            ),
-        }
+            EXPECTED_LOGITS_LEFT_UNPADDED = {
+                9: torch.Tensor([[0.2236, 0.5195, -0.3828], [0.8203, -0.2285, 0.6055], [0.2637, -0.7109, 0.2451]]).to(
+                    torch_device
+                ),
+            }
 
-        EXPECTED_LOGITS_RIGHT_UNPADDED = {
-            7: torch.Tensor([[0.2205, 0.1232, -0.1611], [-0.3484, 0.3030, -1.0312], [0.0742, 0.7930, 0.7969]]).to(
-                torch_device
-            ),
-            8: torch.Tensor([[0.2178, 0.1260, -0.1621], [-0.3496, 0.2988, -1.0312], [0.0693, 0.7930, 0.8008]]).to(
-                torch_device
-            ),
-        }
+            EXPECTED_LOGITS_RIGHT_UNPADDED = {
+                9: torch.Tensor([[0.2197, 0.1250, -0.1611], [-0.3516, 0.3008, -1.0312], [0.0684, 0.7930, 0.8008]]).to(
+                    torch_device
+                ),
+            }
+        else:
+            EXPECTED_LOGITS_LEFT = {
+                7: torch.Tensor(
+                    [[0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007]],
+                ).to(torch_device),
+                8: torch.Tensor([[0.1914, 0.0508, 0.7188], [0.1953, 0.0510, 0.7227], [0.1973, 0.0562, 0.7148]]).to(
+                    torch_device
+                ),
+            }
+
+            EXPECTED_LOGITS_LEFT_UNPADDED = {
+                7: torch.Tensor(
+                    [[0.2212, 0.5200, -0.3816], [0.8213, -0.2313, 0.6069], [0.2664, -0.7090, 0.2468]],
+                ).to(torch_device),
+                8: torch.Tensor([[0.2217, 0.5195, -0.3828], [0.8203, -0.2295, 0.6055], [0.2676, -0.7109, 0.2461]]).to(
+                    torch_device
+                ),
+            }
+
+            EXPECTED_LOGITS_RIGHT_UNPADDED = {
+                7: torch.Tensor([[0.2205, 0.1232, -0.1611], [-0.3484, 0.3030, -1.0312], [0.0742, 0.7930, 0.7969]]).to(
+                    torch_device
+                ),
+                8: torch.Tensor([[0.2178, 0.1260, -0.1621], [-0.3496, 0.2988, -1.0312], [0.0693, 0.7930, 0.8008]]).to(
+                    torch_device
+                ),
+            }
 
         with torch.no_grad():
             logits = model(dummy_input, attention_mask=attention_mask).logits