From dfc174bdbad97b586cca0dc3fb188f436b295e03 Mon Sep 17 00:00:00 2001
From: Mohit Sharma <mohit21sharma.ms@gmail.com>
Date: Tue, 14 May 2024 07:52:18 +0000
Subject: [PATCH 1/5] add fix

---
 src/transformers/testing_utils.py             |   4 +
 tests/models/bark/test_modeling_bark.py       |   7 +-
 tests/models/gemma/test_modeling_gemma.py     | 113 +++++++++++-------
 tests/models/kosmos2/test_modeling_kosmos2.py |   8 +-
 tests/models/llama/test_modeling_llama.py     |  43 ++++---
 tests/models/mixtral/test_modeling_mixtral.py |  91 +++++++++-----
 .../perceiver/test_modeling_perceiver.py      |  12 +-
 tests/trainer/test_trainer.py                 |   5 +-
 tests/trainer/test_trainer_seq2seq.py         |   1 -
 9 files changed, 188 insertions(+), 96 deletions(-)
diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py
index 23ebc3c17863..045095c600c2 100644
--- a/src/transformers/testing_utils.py
+++ b/src/transformers/testing_utils.py
@@ -38,6 +38,7 @@
 from unittest import mock
 from unittest.mock import patch
 
+import torch
 import urllib3
 
 from transformers import logging as transformers_logging
@@ -164,6 +165,9 @@
 # Not critical, only usable on the sandboxed CI instance.
 TOKEN = "hf_94wBhPGp6KrrTH3KDchhKpRxZwd6dmHWLL"
 
+IS_ROCM_SYSTEM = torch.version.hip is not None
+IS_CUDA_SYSTEM = torch.version.cuda is not None
+
 
 def parse_flag_from_env(key, default=False):
     try:
diff --git a/tests/models/bark/test_modeling_bark.py b/tests/models/bark/test_modeling_bark.py
index 476031068f49..bfc6a4dadf9a 100644
--- a/tests/models/bark/test_modeling_bark.py
+++ b/tests/models/bark/test_modeling_bark.py
@@ -1327,4 +1327,9 @@ def test_generate_end_to_end_with_offload(self):
             output_with_offload = self.model.generate(**input_ids, do_sample=False, temperature=1.0)
 
         # checks if same output
-        self.assertListEqual(output_with_no_offload.tolist(), output_with_offload.tolist())
+        self.assertListAlmostEqual(output_with_no_offload.squeeze().tolist(), output_with_offload.squeeze().tolist())
+
+    def assertListAlmostEqual(self, list1, list2, tol=1e-6):
+        self.assertEqual(len(list1), len(list2))
+        for a, b in zip(list1, list2):
+            self.assertAlmostEqual(a, b, delta=tol)
diff --git a/tests/models/gemma/test_modeling_gemma.py b/tests/models/gemma/test_modeling_gemma.py
index 80f275e54ce8..6128f746a057 100644
--- a/tests/models/gemma/test_modeling_gemma.py
+++ b/tests/models/gemma/test_modeling_gemma.py
@@ -20,6 +20,7 @@
 
 from transformers import AutoModelForCausalLM, AutoTokenizer, GemmaConfig, is_torch_available
 from transformers.testing_utils import (
+    IS_ROCM_SYSTEM,
     is_flaky,
     require_bitsandbytes,
     require_flash_attn,
@@ -570,16 +571,24 @@ def test_model_2b_fp16_static_cache(self):
     @require_read_token
     def test_model_2b_bf16(self):
         model_id = "google/gemma-2b"
-        EXPECTED_TEXTS = {
-            7: [
-                "Hello I am doing a project on the 1990s and I need to know what the most popular music",
-                "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Khichdi",
-            ],
-            8: [
-                "Hello I am doing a project on the 1990s and I need to know what the most popular music",
-                "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
-            ],
-        }
+        if IS_ROCM_SYSTEM:
+            EXPECTED_TEXTS = {
+                9: [
+                    "Hello I am doing a project on the 1990s and I need to know what the most popular music",
+                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
+                ],
+            }
+        else:
+            EXPECTED_TEXTS = {
+                7: [
+                    "Hello I am doing a project on the 1990s and I need to know what the most popular music",
+                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Khichdi",
+                ],
+                8: [
+                    "Hello I am doing a project on the 1990s and I need to know what the most popular music",
+                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
+                ],
+            }
 
         model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).to(
             torch_device
@@ -596,16 +605,24 @@ def test_model_2b_bf16(self):
     @require_read_token
     def test_model_2b_eager(self):
         model_id = "google/gemma-2b"
-        EXPECTED_TEXTS = {
-            7: [
-                "Hello I am doing a project on the 1990s and I am looking for some information on the ",
-                "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
-            ],
-            8: [
-                "Hello I am doing a project on the 1990s and I need to know what the most popular music",
-                "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
-            ],
-        }
+        if IS_ROCM_SYSTEM:
+            EXPECTED_TEXTS = {
+                9: [
+                    "Hello I am doing a project on the 1990s and I need to know what the most popular music",
+                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
+                ],
+            }
+        else:
+            EXPECTED_TEXTS = {
+                7: [
+                    "Hello I am doing a project on the 1990s and I am looking for some information on the ",
+                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
+                ],
+                8: [
+                    "Hello I am doing a project on the 1990s and I need to know what the most popular music",
+                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
+                ],
+            }
 
         model = AutoModelForCausalLM.from_pretrained(
             model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16, attn_implementation="eager"
@@ -624,16 +641,24 @@ def test_model_2b_eager(self):
     @require_read_token
     def test_model_2b_sdpa(self):
         model_id = "google/gemma-2b"
-        EXPECTED_TEXTS = {
-            7: [
-                "Hello I am doing a project on the 1990s and I need to know what the most popular music",
-                "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Khichdi",
-            ],
-            8: [
-                "Hello I am doing a project on the 1990s and I need to know what the most popular music",
-                "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
-            ],
-        }
+        if IS_ROCM_SYSTEM:
+            EXPECTED_TEXTS = {
+                9: [
+                    "Hello I am doing a project on the 1990s and I need to know what the most popular music",
+                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
+                ],
+            }
+        else:
+            EXPECTED_TEXTS = {
+                7: [
+                    "Hello I am doing a project on the 1990s and I need to know what the most popular music",
+                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Khichdi",
+                ],
+                8: [
+                    "Hello I am doing a project on the 1990s and I need to know what the most popular music",
+                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
+                ],
+            }
 
         model = AutoModelForCausalLM.from_pretrained(
             model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16, attn_implementation="sdpa"
@@ -732,16 +757,24 @@ def test_model_7b_fp16(self):
     @require_read_token
     def test_model_7b_bf16(self):
         model_id = "google/gemma-7b"
-        EXPECTED_TEXTS = {
-            7: [
-                """Hello I am doing a project on a 1991 240sx and I am trying to find""",
-                "Hi today I am going to show you how to make a very simple and easy to make a very simple and",
-            ],
-            8: [
-                "Hello I am doing a project for my school and I am trying to make a program that will read a .txt file",
-                "Hi today I am going to show you how to make a very simple and easy to make a very simple and",
-            ],
-        }
+        if IS_ROCM_SYSTEM:
+            EXPECTED_TEXTS = {
+                9: [
+                    "Hello I am doing a project for my school and I am trying to get a servo to move a certain amount of degrees",
+                    "Hi today I am going to show you how to make a very simple and easy to make DIY light up sign",
+                ],
+            }
+        else:
+            EXPECTED_TEXTS = {
+                7: [
+                    """Hello I am doing a project on a 1991 240sx and I am trying to find""",
+                    "Hi today I am going to show you how to make a very simple and easy to make a very simple and",
+                ],
+                8: [
+                    "Hello I am doing a project for my school and I am trying to make a program that will read a .txt file",
+                    "Hi today I am going to show you how to make a very simple and easy to make a very simple and",
+                ],
+            }
 
         model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).to(
             torch_device
diff --git a/tests/models/kosmos2/test_modeling_kosmos2.py b/tests/models/kosmos2/test_modeling_kosmos2.py
index 9bc95b8bd44c..ca944d0df034 100644
--- a/tests/models/kosmos2/test_modeling_kosmos2.py
+++ b/tests/models/kosmos2/test_modeling_kosmos2.py
@@ -26,7 +26,7 @@
 
 from transformers import AutoModelForVision2Seq, AutoProcessor, Kosmos2Config
 from transformers.models.kosmos2.configuration_kosmos2 import Kosmos2TextConfig, Kosmos2VisionConfig
-from transformers.testing_utils import require_torch, require_vision, slow, torch_device
+from transformers.testing_utils import IS_ROCM_SYSTEM, require_torch, require_vision, slow, torch_device
 from transformers.utils import is_torch_available, is_vision_available
 
 from ...test_configuration_common import ConfigTester
@@ -562,6 +562,8 @@ def test_snowman_image_captioning(self):
         processed_text = processed_text[0]
         final_text, entities = final_text_with_entities[0]
 
+        atol = 1e-4 if IS_ROCM_SYSTEM else 1e-5
+
         np.testing.assert_allclose(
             torch.concat(scores[1:4])[:3, :3].to("cpu").numpy(),
             np.array(
@@ -571,7 +573,7 @@ def test_snowman_image_captioning(self):
                     [-0.9352350831031799, -4.688288688659668, 6.240612983703613],
                 ]
             ),
-            atol=1e-5,
+            atol=atol,
         )
         np.testing.assert_allclose(
             torch.concat(scores[-3:])[-3:, -3:].to("cpu").numpy(),
@@ -629,7 +631,7 @@ def test_snowman_image_captioning(self):
                     [-0.7624598741531372, -4.771658897399902, 6.576295852661133],
                 ]
             ),
-            atol=1e-5,
+            atol=atol,
         )
         np.testing.assert_allclose(
             torch.concat(scores[-3:])[-3:, -3:].to("cpu").numpy(),
diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py
index 5d402bd85994..8902c0dd81ea 100644
--- a/tests/models/llama/test_modeling_llama.py
+++ b/tests/models/llama/test_modeling_llama.py
@@ -24,6 +24,7 @@
 
 from transformers import LlamaConfig, StaticCache, is_torch_available, set_seed
 from transformers.testing_utils import (
+    IS_ROCM_SYSTEM,
     require_bitsandbytes,
     require_flash_attn,
     require_read_token,
@@ -688,22 +689,32 @@ def test_compile_static_cache(self):
         NUM_TOKENS_TO_GENERATE = 40
         # Note on `EXPECTED_TEXT_COMPLETION`'s diff: the current value matches the original test if the original test
         # was changed to have a cache of 53 tokens (as opposed to 4096), on Ampere GPUs.
-        EXPECTED_TEXT_COMPLETION = {
-            8: [
-                "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial "
-                "reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe "
-                "theory of relativ",
-                "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, "
-                "my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p",
-            ],
-            7: [
-                "Simply put, the theory of relativity states that 1. surely nothing is faster than light.\nThe theory "
-                "goes that nothing travels faster than light, but the faster you go, the slower everything else will "
-                "be.\nThe theory of relativity",
-                "My favorite all time favorite condiment is ketchup. I love it on hamburgers, hot dogs, fries, eggs, "
-                "and even on a good old fashioned cheeseburger. I love it on everything. I love it so",
-            ],
-        }
+        if IS_ROCM_SYSTEM:
+            EXPECTED_TEXT_COMPLETION = {
+                9: [
+                    "Simply put, the theory of relativity states that 1) the speed of light is constant, 2) the speed"
+                    " of lightis the same for all observers, and 3) the laws of physics are the same for all observers.",
+                    "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs,"
+                    " my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p",
+                ],
+            }
+        else:
+            EXPECTED_TEXT_COMPLETION = {
+                8: [
+                    "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial "
+                    "reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe "
+                    "theory of relativ",
+                    "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, "
+                    "my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p",
+                ],
+                7: [
+                    "Simply put, the theory of relativity states that 1. surely nothing is faster than light.\nThe theory "
+                    "goes that nothing travels faster than light, but the faster you go, the slower everything else will "
+                    "be.\nThe theory of relativity",
+                    "My favorite all time favorite condiment is ketchup. I love it on hamburgers, hot dogs, fries, eggs, "
+                    "and even on a good old fashioned cheeseburger. I love it on everything. I love it so",
+                ],
+            }
 
         prompts = [
             "Simply put, the theory of relativity states that ",
diff --git a/tests/models/mixtral/test_modeling_mixtral.py b/tests/models/mixtral/test_modeling_mixtral.py
index 0d92595d8cfa..052f7066bdb5 100644
--- a/tests/models/mixtral/test_modeling_mixtral.py
+++ b/tests/models/mixtral/test_modeling_mixtral.py
@@ -22,6 +22,7 @@
 
 from transformers import MixtralConfig, is_torch_available
 from transformers.testing_utils import (
+    IS_ROCM_SYSTEM,
     is_flaky,
     require_flash_attn,
     require_torch,
@@ -529,14 +530,21 @@ def test_small_model_logits(self):
         )
         # TODO: might need to tweak it in case the logits do not match on our daily runners
         # these logits have been obtained with the original megablocks impelmentation.
-        EXPECTED_LOGITS = {
-            7: torch.Tensor([[0.1670, 0.1620, 0.6094], [-0.8906, -0.1588, -0.6060], [0.1572, 0.1290, 0.7246]]).to(
-                torch_device
-            ),
-            8: torch.Tensor([[0.1631, 0.1621, 0.6094], [-0.8906, -0.1621, -0.6094], [0.1572, 0.1270, 0.7227]]).to(
-                torch_device
-            ),
-        }
+        if IS_ROCM_SYSTEM:
+            EXPECTED_LOGITS = {
+                9: torch.Tensor([[0.1641, 0.1621, 0.6094], [-0.8945, -0.1631, -0.6094], [0.1572, 0.1260, 0.7227]]).to(
+                    torch_device
+                ),
+            }
+        else:
+            EXPECTED_LOGITS = {
+                7: torch.Tensor([[0.1670, 0.1620, 0.6094], [-0.8906, -0.1588, -0.6060], [0.1572, 0.1290, 0.7246]]).to(
+                    torch_device
+                ),
+                8: torch.Tensor([[0.1631, 0.1621, 0.6094], [-0.8906, -0.1621, -0.6094], [0.1572, 0.1270, 0.7227]]).to(
+                    torch_device
+                ),
+            }
         with torch.no_grad():
             logits = model(dummy_input).logits
 
@@ -559,32 +567,51 @@ def test_small_model_logits_batched(self):
         )
 
         # TODO: might need to tweak it in case the logits do not match on our daily runners
-        EXPECTED_LOGITS_LEFT = {
-            7: torch.Tensor(
-                [[0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007]],
-            ).to(torch_device),
-            8: torch.Tensor([[0.1914, 0.0508, 0.7188], [0.1953, 0.0510, 0.7227], [0.1973, 0.0562, 0.7148]]).to(
-                torch_device
-            ),
-        }
+        if IS_ROCM_SYSTEM:
+            EXPECTED_LOGITS_LEFT = {
+                9: torch.Tensor([[0.1904, 0.0513, 0.7227], [0.1943, 0.0518, 0.7227], [0.1982, 0.0557, 0.7148]]).to(
+                    torch_device
+                ),
+            }
 
-        EXPECTED_LOGITS_LEFT_UNPADDED = {
-            7: torch.Tensor(
-                [[0.2212, 0.5200, -0.3816], [0.8213, -0.2313, 0.6069], [0.2664, -0.7090, 0.2468]],
-            ).to(torch_device),
-            8: torch.Tensor([[0.2217, 0.5195, -0.3828], [0.8203, -0.2295, 0.6055], [0.2676, -0.7109, 0.2461]]).to(
-                torch_device
-            ),
-        }
+            EXPECTED_LOGITS_LEFT_UNPADDED = {
+                9: torch.Tensor([[0.2236, 0.5195, -0.3828], [0.8203, -0.2285, 0.6055], [0.2637, -0.7109, 0.2451]]).to(
+                    torch_device
+                ),
+            }
 
-        EXPECTED_LOGITS_RIGHT_UNPADDED = {
-            7: torch.Tensor([[0.2205, 0.1232, -0.1611], [-0.3484, 0.3030, -1.0312], [0.0742, 0.7930, 0.7969]]).to(
-                torch_device
-            ),
-            8: torch.Tensor([[0.2178, 0.1260, -0.1621], [-0.3496, 0.2988, -1.0312], [0.0693, 0.7930, 0.8008]]).to(
-                torch_device
-            ),
-        }
+            EXPECTED_LOGITS_RIGHT_UNPADDED = {
+                9: torch.Tensor([[0.2197, 0.1250, -0.1611], [-0.3516, 0.3008, -1.0312], [0.0684, 0.7930, 0.8008]]).to(
+                    torch_device
+                ),
+            }
+        else:
+            EXPECTED_LOGITS_LEFT = {
+                7: torch.Tensor(
+                    [[0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007]],
+                ).to(torch_device),
+                8: torch.Tensor([[0.1914, 0.0508, 0.7188], [0.1953, 0.0510, 0.7227], [0.1973, 0.0562, 0.7148]]).to(
+                    torch_device
+                ),
+            }
+
+            EXPECTED_LOGITS_LEFT_UNPADDED = {
+                7: torch.Tensor(
+                    [[0.2212, 0.5200, -0.3816], [0.8213, -0.2313, 0.6069], [0.2664, -0.7090, 0.2468]],
+                ).to(torch_device),
+                8: torch.Tensor([[0.2217, 0.5195, -0.3828], [0.8203, -0.2295, 0.6055], [0.2676, -0.7109, 0.2461]]).to(
+                    torch_device
+                ),
+            }
+
+            EXPECTED_LOGITS_RIGHT_UNPADDED = {
+                7: torch.Tensor([[0.2205, 0.1232, -0.1611], [-0.3484, 0.3030, -1.0312], [0.0742, 0.7930, 0.7969]]).to(
+                    torch_device
+                ),
+                8: torch.Tensor([[0.2178, 0.1260, -0.1621], [-0.3496, 0.2988, -1.0312], [0.0693, 0.7930, 0.8008]]).to(
+                    torch_device
+                ),
+            }
 
         with torch.no_grad():
             logits = model(dummy_input, attention_mask=attention_mask).logits
diff --git a/tests/models/perceiver/test_modeling_perceiver.py b/tests/models/perceiver/test_modeling_perceiver.py
index fbd237bc1058..86bbc7b49d0b 100644
--- a/tests/models/perceiver/test_modeling_perceiver.py
+++ b/tests/models/perceiver/test_modeling_perceiver.py
@@ -26,7 +26,14 @@
 from datasets import load_dataset
 
 from transformers import PerceiverConfig
-from transformers.testing_utils import require_torch, require_torch_multi_gpu, require_vision, slow, torch_device
+from transformers.testing_utils import (
+    IS_ROCM_SYSTEM,
+    require_torch,
+    require_torch_multi_gpu,
+    require_vision,
+    slow,
+    torch_device,
+)
 from transformers.utils import is_torch_available, is_vision_available
 
 from ...test_configuration_common import ConfigTester
@@ -930,7 +937,8 @@ def test_inference_image_classification(self):
 
         expected_slice = torch.tensor([-1.1652, -0.1992, -0.7520], device=torch_device)
 
-        self.assertTrue(torch.allclose(logits[0, :3], expected_slice, atol=1e-4))
+        atol = 1e-3 if IS_ROCM_SYSTEM else 1e-4
+        self.assertTrue(torch.allclose(logits[0, :3], expected_slice, atol=atol))
 
     @slow
     def test_inference_image_classification_fourier(self):
diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py
index c420da4052f1..2b2f2bb2d96a 100644
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@@ -2503,7 +2503,7 @@ def test_trainer_eval_mrpc(self):
         )
         eval_dataset = GlueDataset(data_args, tokenizer=tokenizer, mode="dev")
 
-        training_args = TrainingArguments(output_dir="./examples", use_cpu=True)
+        training_args = TrainingArguments(output_dir="./examples", use_cpu=True, report_to="none")
         trainer = Trainer(model=model, args=training_args, eval_dataset=eval_dataset)
         result = trainer.evaluate()
         self.assertLess(result["eval_loss"], 0.2)
@@ -2524,6 +2524,7 @@ def test_trainer_eval_multiple(self):
             output_dir="./examples",
             use_cpu=True,
             per_device_eval_batch_size=1,
+            report_to="none",
         )
         trainer = Trainer(
             model=model,
@@ -3059,6 +3060,8 @@ def test_end_to_end_example(self):
                 "--predict_with_generate",
                 "--ddp_timeout",
                 "60",
+                "--report_to",
+                "none",
             ]
             execute_subprocess_async(command)
             # successful return here == success - any errors would have caused an error or a timeout in the sub-call
diff --git a/tests/trainer/test_trainer_seq2seq.py b/tests/trainer/test_trainer_seq2seq.py
index 61d2163b9e81..17593e2e73bf 100644
--- a/tests/trainer/test_trainer_seq2seq.py
+++ b/tests/trainer/test_trainer_seq2seq.py
@@ -161,7 +161,6 @@ def test_return_sequences(self):
             tokenizer=tokenizer,
             data_collator=data_collator,
             compute_metrics=lambda x: {"samples": x[0].shape[0]},
-            report_to="none",
         )
 
         def prepare_data(examples):

From 9a8032dabf25d0feb0b2ec7c4b35324de125e005 Mon Sep 17 00:00:00 2001
From: Mohit Sharma <mohit21sharma.ms@gmail.com>
Date: Tue, 14 May 2024 10:31:29 +0000
Subject: [PATCH 2/5] update import

---
 src/transformers/testing_utils.py         |  8 +++++---
 tests/models/llama/test_modeling_llama.py | 11 ++++++++---
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py
index 045095c600c2..2757ea187570 100644
--- a/src/transformers/testing_utils.py
+++ b/src/transformers/testing_utils.py
@@ -38,7 +38,6 @@
 from unittest import mock
 from unittest.mock import patch
 
-import torch
 import urllib3
 
 from transformers import logging as transformers_logging
@@ -165,8 +164,11 @@
 # Not critical, only usable on the sandboxed CI instance.
 TOKEN = "hf_94wBhPGp6KrrTH3KDchhKpRxZwd6dmHWLL"
 
-IS_ROCM_SYSTEM = torch.version.hip is not None
-IS_CUDA_SYSTEM = torch.version.cuda is not None
+if is_torch_available():
+    import torch
+
+    IS_ROCM_SYSTEM = torch.version.hip is not None
+    IS_CUDA_SYSTEM = torch.version.cuda is not None
 
 
 def parse_flag_from_env(key, default=False):
diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py
index 8902c0dd81ea..8aecc584952e 100644
--- a/tests/models/llama/test_modeling_llama.py
+++ b/tests/models/llama/test_modeling_llama.py
@@ -692,12 +692,14 @@ def test_compile_static_cache(self):
         if IS_ROCM_SYSTEM:
             EXPECTED_TEXT_COMPLETION = {
                 9: [
-                    "Simply put, the theory of relativity states that 1) the speed of light is constant, 2) the speed"
-                    " of lightis the same for all observers, and 3) the laws of physics are the same for all observers.",
+                    "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial"
+                    " reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe "
+                    "theory of relativ",
                     "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs,"
                     " my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p",
                 ],
             }
+            expected_text_completion_idx = 9
         else:
             EXPECTED_TEXT_COMPLETION = {
                 8: [
@@ -715,6 +717,7 @@ def test_compile_static_cache(self):
                     "and even on a good old fashioned cheeseburger. I love it on everything. I love it so",
                 ],
             }
+            expected_text_completion_idx = 8
 
         prompts = [
             "Simply put, the theory of relativity states that ",
@@ -729,7 +732,9 @@ def test_compile_static_cache(self):
         # Dynamic Cache
         generated_ids = model.generate(**inputs, max_new_tokens=NUM_TOKENS_TO_GENERATE, do_sample=False)
         dynamic_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
-        self.assertEqual(EXPECTED_TEXT_COMPLETION[8], dynamic_text)  # Both GPU architectures have the same output
+        self.assertEqual(
+            EXPECTED_TEXT_COMPLETION[expected_text_completion_idx], dynamic_text
+        )  # Both GPU architectures have the same output
 
         # Static Cache
         generated_ids = model.generate(

From 7da11b810e5cc4d69abfa1047e1b2c0e8a38fda2 Mon Sep 17 00:00:00 2001
From: Mohit Sharma <mohit21sharma.ms@gmail.com>
Date: Fri, 17 May 2024 11:20:37 +0000
Subject: [PATCH 3/5] updated dicts and comments

---
 tests/models/gemma/test_modeling_gemma.py     | 158 ++++++++++--------
 tests/models/llama/test_modeling_llama.py     |  59 ++++---
 tests/models/mistral/test_modeling_mistral.py |  10 ++
 tests/models/mixtral/test_modeling_mixtral.py | 113 ++++++-------
 4 files changed, 178 insertions(+), 162 deletions(-)

diff --git a/tests/models/gemma/test_modeling_gemma.py b/tests/models/gemma/test_modeling_gemma.py
index 3ea969a8c397..d9a1f9efc2b5 100644
--- a/tests/models/gemma/test_modeling_gemma.py
+++ b/tests/models/gemma/test_modeling_gemma.py
@@ -21,7 +21,6 @@
 
 from transformers import AutoModelForCausalLM, AutoTokenizer, GemmaConfig, is_torch_available
 from transformers.testing_utils import (
-    IS_ROCM_SYSTEM,
     is_flaky,
     require_bitsandbytes,
     require_flash_attn,
@@ -575,24 +574,25 @@ def test_model_2b_fp16_static_cache(self):
     @require_read_token
     def test_model_2b_bf16(self):
         model_id = "google/gemma-2b"
-        if IS_ROCM_SYSTEM:
-            EXPECTED_TEXTS = {
-                9: [
-                    "Hello I am doing a project on the 1990s and I need to know what the most popular music",
-                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
-                ],
-            }
-        else:
-            EXPECTED_TEXTS = {
-                7: [
-                    "Hello I am doing a project on the 1990s and I need to know what the most popular music",
-                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Khichdi",
-                ],
-                8: [
-                    "Hello I am doing a project on the 1990s and I need to know what the most popular music",
-                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
-                ],
-            }
+
+        # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4.
+        #
+        # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
+        # considering differences in hardware processing and potential deviations in generated text.
+        EXPECTED_TEXTS = {
+            7: [
+                "Hello I am doing a project on the 1990s and I need to know what the most popular music",
+                "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Khichdi",
+            ],
+            8: [
+                "Hello I am doing a project on the 1990s and I need to know what the most popular music",
+                "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
+            ],
+            9: [
+                "Hello I am doing a project on the 1990s and I need to know what the most popular music",
+                "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
+            ],
+        }
 
         model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).to(
             torch_device
@@ -609,24 +609,25 @@ def test_model_2b_bf16(self):
     @require_read_token
     def test_model_2b_eager(self):
         model_id = "google/gemma-2b"
-        if IS_ROCM_SYSTEM:
-            EXPECTED_TEXTS = {
-                9: [
-                    "Hello I am doing a project on the 1990s and I need to know what the most popular music",
-                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
-                ],
-            }
-        else:
-            EXPECTED_TEXTS = {
-                7: [
-                    "Hello I am doing a project on the 1990s and I am looking for some information on the ",
-                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
-                ],
-                8: [
-                    "Hello I am doing a project on the 1990s and I need to know what the most popular music",
-                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
-                ],
-            }
+
+        # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4.
+        #
+        # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
+        # considering differences in hardware processing and potential deviations in generated text.
+        EXPECTED_TEXTS = {
+            7: [
+                "Hello I am doing a project on the 1990s and I am looking for some information on the ",
+                "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
+            ],
+            8: [
+                "Hello I am doing a project on the 1990s and I need to know what the most popular music",
+                "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
+            ],
+            9: [
+                "Hello I am doing a project on the 1990s and I need to know what the most popular music",
+                "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
+            ],
+        }
 
         model = AutoModelForCausalLM.from_pretrained(
             model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16, attn_implementation="eager"
@@ -645,24 +646,25 @@ def test_model_2b_eager(self):
     @require_read_token
     def test_model_2b_sdpa(self):
         model_id = "google/gemma-2b"
-        if IS_ROCM_SYSTEM:
-            EXPECTED_TEXTS = {
-                9: [
-                    "Hello I am doing a project on the 1990s and I need to know what the most popular music",
-                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
-                ],
-            }
-        else:
-            EXPECTED_TEXTS = {
-                7: [
-                    "Hello I am doing a project on the 1990s and I need to know what the most popular music",
-                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Khichdi",
-                ],
-                8: [
-                    "Hello I am doing a project on the 1990s and I need to know what the most popular music",
-                    "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
-                ],
-            }
+
+        # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4.
+        #
+        # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
+        # considering differences in hardware processing and potential deviations in generated text.
+        EXPECTED_TEXTS = {
+            7: [
+                "Hello I am doing a project on the 1990s and I need to know what the most popular music",
+                "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Khichdi",
+            ],
+            8: [
+                "Hello I am doing a project on the 1990s and I need to know what the most popular music",
+                "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
+            ],
+            9: [
+                "Hello I am doing a project on the 1990s and I need to know what the most popular music",
+                "Hi today I am going to share with you a very easy and simple recipe of <strong><em>Kaju Kat",
+            ],
+        }
 
         model = AutoModelForCausalLM.from_pretrained(
             model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16, attn_implementation="sdpa"
@@ -761,24 +763,25 @@ def test_model_7b_fp16(self):
     @require_read_token
     def test_model_7b_bf16(self):
         model_id = "google/gemma-7b"
-        if IS_ROCM_SYSTEM:
-            EXPECTED_TEXTS = {
-                9: [
-                    "Hello I am doing a project for my school and I am trying to get a servo to move a certain amount of degrees",
-                    "Hi today I am going to show you how to make a very simple and easy to make DIY light up sign",
-                ],
-            }
-        else:
-            EXPECTED_TEXTS = {
-                7: [
-                    """Hello I am doing a project on a 1991 240sx and I am trying to find""",
-                    "Hi today I am going to show you how to make a very simple and easy to make a very simple and",
-                ],
-                8: [
-                    "Hello I am doing a project for my school and I am trying to make a program that will read a .txt file",
-                    "Hi today I am going to show you how to make a very simple and easy to make a very simple and",
-                ],
-            }
+
+        # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4.
+        #
+        # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
+        # considering differences in hardware processing and potential deviations in generated text.
+        EXPECTED_TEXTS = {
+            7: [
+                """Hello I am doing a project on a 1991 240sx and I am trying to find""",
+                "Hi today I am going to show you how to make a very simple and easy to make a very simple and",
+            ],
+            8: [
+                "Hello I am doing a project for my school and I am trying to make a program that will read a .txt file",
+                "Hi today I am going to show you how to make a very simple and easy to make a very simple and",
+            ],
+            9: [
+                "Hello I am doing a project for my school and I am trying to get a servo to move a certain amount of degrees",
+                "Hi today I am going to show you how to make a very simple and easy to make DIY light up sign",
+            ],
+        }
 
         model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).to(
             torch_device
@@ -851,6 +854,11 @@ def test_compile_static_cache(self):
         NUM_TOKENS_TO_GENERATE = 40
         # Note on `EXPECTED_TEXT_COMPLETION`'s diff: the current value matches the original test if the original test
         # was changed to have a cache of 53 tokens (as opposed to 4096), on Ampere GPUs.
+        #
+        # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4.
+        #
+        # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
+        # considering differences in hardware processing and potential deviations in generated text.
         EXPECTED_TEXT_COMPLETION = {
             8: [
                 "Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found",
@@ -860,6 +868,10 @@ def test_compile_static_cache(self):
                 "Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found",
                 "Hi today\nI have a problem with my 2007 1.9 tdi 105bhp.\nI have a problem with the engine management light on.\nI have checked the",
             ],
+            9: [
+                "Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found",
+                "Hi today\nI have a problem with my 2007 1.9 tdi 105bhp.\nI have a problem with the engine management light on.\nI have checked the",
+            ],
         }
 
         prompts = ["Hello I am doing", "Hi today"]
diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py
index 829441a87789..cf579f187470 100644
--- a/tests/models/llama/test_modeling_llama.py
+++ b/tests/models/llama/test_modeling_llama.py
@@ -24,7 +24,6 @@
 
 from transformers import LlamaConfig, StaticCache, is_torch_available, set_seed
 from transformers.testing_utils import (
-    IS_ROCM_SYSTEM,
     require_bitsandbytes,
     require_flash_attn,
     require_read_token,
@@ -692,35 +691,35 @@ def test_compile_static_cache(self):
         NUM_TOKENS_TO_GENERATE = 40
         # Note on `EXPECTED_TEXT_COMPLETION`'s diff: the current value matches the original test if the original test
         # was changed to have a cache of 53 tokens (as opposed to 4096), on Ampere GPUs.
-        if IS_ROCM_SYSTEM:
-            EXPECTED_TEXT_COMPLETION = {
-                9: [
-                    "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial"
-                    " reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe "
-                    "theory of relativ",
-                    "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs,"
-                    " my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p",
-                ],
-            }
-            expected_text_completion_idx = 9
-        else:
-            EXPECTED_TEXT_COMPLETION = {
-                8: [
-                    "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial "
-                    "reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe "
-                    "theory of relativ",
-                    "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, "
-                    "my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p",
-                ],
-                7: [
-                    "Simply put, the theory of relativity states that 1. surely nothing is faster than light.\nThe theory "
-                    "goes that nothing travels faster than light, but the faster you go, the slower everything else will "
-                    "be.\nThe theory of relativity",
-                    "My favorite all time favorite condiment is ketchup. I love it on hamburgers, hot dogs, fries, eggs, "
-                    "and even on a good old fashioned cheeseburger. I love it on everything. I love it so",
-                ],
-            }
-            expected_text_completion_idx = 8
+        #
+        # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4.
+        #
+        # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
+        # considering differences in hardware processing and potential deviations in generated text.
+        EXPECTED_TEXT_COMPLETION = {
+            8: [
+                "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial "
+                "reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe "
+                "theory of relativ",
+                "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, "
+                "my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p",
+            ],
+            7: [
+                "Simply put, the theory of relativity states that 1. surely nothing is faster than light.\nThe theory "
+                "goes that nothing travels faster than light, but the faster you go, the slower everything else will "
+                "be.\nThe theory of relativity",
+                "My favorite all time favorite condiment is ketchup. I love it on hamburgers, hot dogs, fries, eggs, "
+                "and even on a good old fashioned cheeseburger. I love it on everything. I love it so",
+            ],
+            9: [
+                "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial"
+                " reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe "
+                "theory of relativ",
+                "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs,"
+                " my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p",
+            ],
+        }
+        expected_text_completion_idx = 8
 
         prompts = [
             "Simply put, the theory of relativity states that ",
diff --git a/tests/models/mistral/test_modeling_mistral.py b/tests/models/mistral/test_modeling_mistral.py
index bbc36c050e23..68a70fcaa56e 100644
--- a/tests/models/mistral/test_modeling_mistral.py
+++ b/tests/models/mistral/test_modeling_mistral.py
@@ -500,9 +500,14 @@ def test_model_7b_logits(self):
         EXPECTED_MEAN = torch.tensor([[-2.5548, -2.5737, -3.0600, -2.5906, -2.8478, -2.8118, -2.9325, -2.7694]])
         torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, atol=1e-2, rtol=1e-2)
 
+        # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4.
+        #
+        # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
+        # considering differences in hardware processing and potential deviations in output.
         EXPECTED_SLICE = {
             7: torch.tensor([-5.8781, -5.8616, -0.1052, -4.7200, -5.8781, -5.8774, -5.8773, -5.8777, -5.8781, -5.8780, -5.8781, -5.8779, -1.0787, 1.7583, -5.8779, -5.8780, -5.8783, -5.8778, -5.8776, -5.8781, -5.8784, -5.8778, -5.8778, -5.8777, -5.8779, -5.8778, -5.8776, -5.8780, -5.8779, -5.8781]),
             8: torch.tensor([-5.8711, -5.8555, -0.1050, -4.7148, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -1.0781, 1.7568, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711]),
+            9: torch.tensor([-5.8750, -5.8594, -0.1047, -4.7188, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -1.0781,  1.7578, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750]),
         }  # fmt: skip
 
         print(out[0, 0, :30])
@@ -605,9 +610,14 @@ def test_model_7b_long_prompt_sdpa(self):
 
     @slow
     def test_speculative_generation(self):
+        # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4.
+        #
+        # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
+        # considering differences in hardware processing and potential deviations in generated text.
         EXPECTED_TEXT_COMPLETION = {
             7: "My favourite condiment is 100% Sriracha. I love the heat, the tang and the fact costs",
             8: "My favourite condiment is 100% Sriracha. I love the heat, the sweetness, the tang",
+            9: "My favourite condiment is 100% ketchup. I love it on everything. I’m not a big",
         }
         prompt = "My favourite condiment is "
         tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", use_fast=False)
diff --git a/tests/models/mixtral/test_modeling_mixtral.py b/tests/models/mixtral/test_modeling_mixtral.py
index 052f7066bdb5..d90294397488 100644
--- a/tests/models/mixtral/test_modeling_mixtral.py
+++ b/tests/models/mixtral/test_modeling_mixtral.py
@@ -22,7 +22,6 @@
 
 from transformers import MixtralConfig, is_torch_available
 from transformers.testing_utils import (
-    IS_ROCM_SYSTEM,
     is_flaky,
     require_flash_attn,
     require_torch,
@@ -530,24 +529,25 @@ def test_small_model_logits(self):
         )
         # TODO: might need to tweak it in case the logits do not match on our daily runners
         # these logits have been obtained with the original megablocks impelmentation.
-        if IS_ROCM_SYSTEM:
-            EXPECTED_LOGITS = {
-                9: torch.Tensor([[0.1641, 0.1621, 0.6094], [-0.8945, -0.1631, -0.6094], [0.1572, 0.1260, 0.7227]]).to(
-                    torch_device
-                ),
-            }
-        else:
-            EXPECTED_LOGITS = {
-                7: torch.Tensor([[0.1670, 0.1620, 0.6094], [-0.8906, -0.1588, -0.6060], [0.1572, 0.1290, 0.7246]]).to(
-                    torch_device
-                ),
-                8: torch.Tensor([[0.1631, 0.1621, 0.6094], [-0.8906, -0.1621, -0.6094], [0.1572, 0.1270, 0.7227]]).to(
-                    torch_device
-                ),
-            }
+        # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4.
+        #
+        # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
+        # considering differences in hardware processing and potential deviations in output.
+        EXPECTED_LOGITS = {
+            7: torch.Tensor([[0.1670, 0.1620, 0.6094], [-0.8906, -0.1588, -0.6060], [0.1572, 0.1290, 0.7246]]).to(
+                torch_device
+            ),
+            8: torch.Tensor([[0.1631, 0.1621, 0.6094], [-0.8906, -0.1621, -0.6094], [0.1572, 0.1270, 0.7227]]).to(
+                torch_device
+            ),
+            9: torch.Tensor([[0.1641, 0.1621, 0.6094], [-0.8906, -0.1631, -0.6094], [0.1572, 0.1260, 0.7227]]).to(
+                torch_device
+            ),
+        }
         with torch.no_grad():
             logits = model(dummy_input).logits
 
+        print(logits[:, :3, :3])
         torch.testing.assert_close(
             logits[0, :3, :3], EXPECTED_LOGITS[self.cuda_compute_capability_major_version], atol=1e-3, rtol=1e-3
         )
@@ -567,51 +567,46 @@ def test_small_model_logits_batched(self):
         )
 
         # TODO: might need to tweak it in case the logits do not match on our daily runners
-        if IS_ROCM_SYSTEM:
-            EXPECTED_LOGITS_LEFT = {
-                9: torch.Tensor([[0.1904, 0.0513, 0.7227], [0.1943, 0.0518, 0.7227], [0.1982, 0.0557, 0.7148]]).to(
-                    torch_device
-                ),
-            }
-
-            EXPECTED_LOGITS_LEFT_UNPADDED = {
-                9: torch.Tensor([[0.2236, 0.5195, -0.3828], [0.8203, -0.2285, 0.6055], [0.2637, -0.7109, 0.2451]]).to(
-                    torch_device
-                ),
-            }
+        #
+        # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4.
+        #
+        # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
+        # considering differences in hardware processing and potential deviations in generated text.
+        EXPECTED_LOGITS_LEFT = {
+            7: torch.Tensor(
+                [[0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007]],
+            ).to(torch_device),
+            8: torch.Tensor([[0.1914, 0.0508, 0.7188], [0.1953, 0.0510, 0.7227], [0.1973, 0.0562, 0.7148]]).to(
+                torch_device
+            ),
+            9: torch.Tensor([[0.1904, 0.0513, 0.7227], [0.1943, 0.0518, 0.7227], [0.1982, 0.0557, 0.7148]]).to(
+                torch_device
+            ),
+        }
 
-            EXPECTED_LOGITS_RIGHT_UNPADDED = {
-                9: torch.Tensor([[0.2197, 0.1250, -0.1611], [-0.3516, 0.3008, -1.0312], [0.0684, 0.7930, 0.8008]]).to(
-                    torch_device
-                ),
-            }
-        else:
-            EXPECTED_LOGITS_LEFT = {
-                7: torch.Tensor(
-                    [[0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007]],
-                ).to(torch_device),
-                8: torch.Tensor([[0.1914, 0.0508, 0.7188], [0.1953, 0.0510, 0.7227], [0.1973, 0.0562, 0.7148]]).to(
-                    torch_device
-                ),
-            }
-
-            EXPECTED_LOGITS_LEFT_UNPADDED = {
-                7: torch.Tensor(
-                    [[0.2212, 0.5200, -0.3816], [0.8213, -0.2313, 0.6069], [0.2664, -0.7090, 0.2468]],
-                ).to(torch_device),
-                8: torch.Tensor([[0.2217, 0.5195, -0.3828], [0.8203, -0.2295, 0.6055], [0.2676, -0.7109, 0.2461]]).to(
-                    torch_device
-                ),
-            }
+        EXPECTED_LOGITS_LEFT_UNPADDED = {
+            7: torch.Tensor(
+                [[0.2212, 0.5200, -0.3816], [0.8213, -0.2313, 0.6069], [0.2664, -0.7090, 0.2468]],
+            ).to(torch_device),
+            8: torch.Tensor([[0.2217, 0.5195, -0.3828], [0.8203, -0.2295, 0.6055], [0.2676, -0.7109, 0.2461]]).to(
+                torch_device
+            ),
+            9: torch.Tensor([[0.2236, 0.5195, -0.3828], [0.8203, -0.2285, 0.6055], [0.2637, -0.7109, 0.2451]]).to(
+                torch_device
+            ),
+        }
 
-            EXPECTED_LOGITS_RIGHT_UNPADDED = {
-                7: torch.Tensor([[0.2205, 0.1232, -0.1611], [-0.3484, 0.3030, -1.0312], [0.0742, 0.7930, 0.7969]]).to(
-                    torch_device
-                ),
-                8: torch.Tensor([[0.2178, 0.1260, -0.1621], [-0.3496, 0.2988, -1.0312], [0.0693, 0.7930, 0.8008]]).to(
-                    torch_device
-                ),
-            }
+        EXPECTED_LOGITS_RIGHT_UNPADDED = {
+            7: torch.Tensor([[0.2205, 0.1232, -0.1611], [-0.3484, 0.3030, -1.0312], [0.0742, 0.7930, 0.7969]]).to(
+                torch_device
+            ),
+            8: torch.Tensor([[0.2178, 0.1260, -0.1621], [-0.3496, 0.2988, -1.0312], [0.0693, 0.7930, 0.8008]]).to(
+                torch_device
+            ),
+            9: torch.Tensor([[0.2197, 0.1250, -0.1611], [-0.3516, 0.3008, -1.0312], [0.0684, 0.7930, 0.8008]]).to(
+                torch_device
+            ),
+        }
 
         with torch.no_grad():
             logits = model(dummy_input, attention_mask=attention_mask).logits

From 7841c76a384c502904a609d5fcd0537a56af1486 Mon Sep 17 00:00:00 2001
From: Mohit Sharma <mohit21sharma.ms@gmail.com>
Date: Fri, 17 May 2024 11:23:31 +0000
Subject: [PATCH 4/5] remove prints

---
 tests/models/mistral/test_modeling_mistral.py | 1 -
 tests/models/mixtral/test_modeling_mixtral.py | 1 -
 2 files changed, 2 deletions(-)

diff --git a/tests/models/mistral/test_modeling_mistral.py b/tests/models/mistral/test_modeling_mistral.py
index 68a70fcaa56e..0c845e1ce2d0 100644
--- a/tests/models/mistral/test_modeling_mistral.py
+++ b/tests/models/mistral/test_modeling_mistral.py
@@ -510,7 +510,6 @@ def test_model_7b_logits(self):
             9: torch.tensor([-5.8750, -5.8594, -0.1047, -4.7188, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -1.0781,  1.7578, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750]),
         }  # fmt: skip
 
-        print(out[0, 0, :30])
         torch.testing.assert_close(
             out[0, 0, :30], EXPECTED_SLICE[self.cuda_compute_capability_major_version], atol=1e-4, rtol=1e-4
         )
diff --git a/tests/models/mixtral/test_modeling_mixtral.py b/tests/models/mixtral/test_modeling_mixtral.py
index d90294397488..1926a88a4c8b 100644
--- a/tests/models/mixtral/test_modeling_mixtral.py
+++ b/tests/models/mixtral/test_modeling_mixtral.py
@@ -547,7 +547,6 @@ def test_small_model_logits(self):
         with torch.no_grad():
             logits = model(dummy_input).logits
 
-        print(logits[:, :3, :3])
         torch.testing.assert_close(
             logits[0, :3, :3], EXPECTED_LOGITS[self.cuda_compute_capability_major_version], atol=1e-3, rtol=1e-3
         )

From b64b9d5d41fe5a2ef720ddd8cb26e15b56d836a1 Mon Sep 17 00:00:00 2001
From: Mohit Sharma <mohit21sharma.ms@gmail.com>
Date: Fri, 17 May 2024 23:37:50 +0530
Subject: [PATCH 5/5] Update testing_utils.py

---
 src/transformers/testing_utils.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py
index 74d3662132c5..d997bc631b0e 100644
--- a/src/transformers/testing_utils.py
+++ b/src/transformers/testing_utils.py
@@ -170,6 +170,9 @@
 
     IS_ROCM_SYSTEM = torch.version.hip is not None
     IS_CUDA_SYSTEM = torch.version.cuda is not None
+else:
+    IS_ROCM_SYSTEM = False
+    IS_CUDA_SYSTEM = False
 
 
 def parse_flag_from_env(key, default=False):