From dfc174bdbad97b586cca0dc3fb188f436b295e03 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 14 May 2024 07:52:18 +0000 Subject: [PATCH 1/5] add fix --- src/transformers/testing_utils.py | 4 + tests/models/bark/test_modeling_bark.py | 7 +- tests/models/gemma/test_modeling_gemma.py | 113 +++++++++++------- tests/models/kosmos2/test_modeling_kosmos2.py | 8 +- tests/models/llama/test_modeling_llama.py | 43 ++++--- tests/models/mixtral/test_modeling_mixtral.py | 91 +++++++++----- .../perceiver/test_modeling_perceiver.py | 12 +- tests/trainer/test_trainer.py | 5 +- tests/trainer/test_trainer_seq2seq.py | 1 - 9 files changed, 188 insertions(+), 96 deletions(-) diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index 23ebc3c17863..045095c600c2 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -38,6 +38,7 @@ from unittest import mock from unittest.mock import patch +import torch import urllib3 from transformers import logging as transformers_logging @@ -164,6 +165,9 @@ # Not critical, only usable on the sandboxed CI instance. TOKEN = "hf_94wBhPGp6KrrTH3KDchhKpRxZwd6dmHWLL" +IS_ROCM_SYSTEM = torch.version.hip is not None +IS_CUDA_SYSTEM = torch.version.cuda is not None + def parse_flag_from_env(key, default=False): try: diff --git a/tests/models/bark/test_modeling_bark.py b/tests/models/bark/test_modeling_bark.py index 476031068f49..bfc6a4dadf9a 100644 --- a/tests/models/bark/test_modeling_bark.py +++ b/tests/models/bark/test_modeling_bark.py @@ -1327,4 +1327,9 @@ def test_generate_end_to_end_with_offload(self): output_with_offload = self.model.generate(**input_ids, do_sample=False, temperature=1.0) # checks if same output - self.assertListEqual(output_with_no_offload.tolist(), output_with_offload.tolist()) + self.assertListAlmostEqual(output_with_no_offload.squeeze().tolist(), output_with_offload.squeeze().tolist()) + + def assertListAlmostEqual(self, list1, list2, tol=1e-6): + self.assertEqual(len(list1), len(list2)) + for a, b in zip(list1, list2): + self.assertAlmostEqual(a, b, delta=tol) diff --git a/tests/models/gemma/test_modeling_gemma.py b/tests/models/gemma/test_modeling_gemma.py index 80f275e54ce8..6128f746a057 100644 --- a/tests/models/gemma/test_modeling_gemma.py +++ b/tests/models/gemma/test_modeling_gemma.py @@ -20,6 +20,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, GemmaConfig, is_torch_available from transformers.testing_utils import ( + IS_ROCM_SYSTEM, is_flaky, require_bitsandbytes, require_flash_attn, @@ -570,16 +571,24 @@ def test_model_2b_fp16_static_cache(self): @require_read_token def test_model_2b_bf16(self): model_id = "google/gemma-2b" - EXPECTED_TEXTS = { - 7: [ - "Hello I am doing a project on the 1990s and I need to know what the most popular music", - "Hi today I am going to share with you a very easy and simple recipe of Khichdi", - ], - 8: [ - "Hello I am doing a project on the 1990s and I need to know what the most popular music", - "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", - ], - } + if IS_ROCM_SYSTEM: + EXPECTED_TEXTS = { + 9: [ + "Hello I am doing a project on the 1990s and I need to know what the most popular music", + "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", + ], + } + else: + EXPECTED_TEXTS = { + 7: [ + "Hello I am doing a project on the 1990s and I need to know what the most popular music", + "Hi today I am going to share with you a very easy and simple recipe of Khichdi", + ], + 8: [ + "Hello I am doing a project on the 1990s and I need to know what the most popular music", + "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", + ], + } model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).to( torch_device @@ -596,16 +605,24 @@ def test_model_2b_bf16(self): @require_read_token def test_model_2b_eager(self): model_id = "google/gemma-2b" - EXPECTED_TEXTS = { - 7: [ - "Hello I am doing a project on the 1990s and I am looking for some information on the ", - "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", - ], - 8: [ - "Hello I am doing a project on the 1990s and I need to know what the most popular music", - "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", - ], - } + if IS_ROCM_SYSTEM: + EXPECTED_TEXTS = { + 9: [ + "Hello I am doing a project on the 1990s and I need to know what the most popular music", + "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", + ], + } + else: + EXPECTED_TEXTS = { + 7: [ + "Hello I am doing a project on the 1990s and I am looking for some information on the ", + "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", + ], + 8: [ + "Hello I am doing a project on the 1990s and I need to know what the most popular music", + "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", + ], + } model = AutoModelForCausalLM.from_pretrained( model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16, attn_implementation="eager" @@ -624,16 +641,24 @@ def test_model_2b_eager(self): @require_read_token def test_model_2b_sdpa(self): model_id = "google/gemma-2b" - EXPECTED_TEXTS = { - 7: [ - "Hello I am doing a project on the 1990s and I need to know what the most popular music", - "Hi today I am going to share with you a very easy and simple recipe of Khichdi", - ], - 8: [ - "Hello I am doing a project on the 1990s and I need to know what the most popular music", - "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", - ], - } + if IS_ROCM_SYSTEM: + EXPECTED_TEXTS = { + 9: [ + "Hello I am doing a project on the 1990s and I need to know what the most popular music", + "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", + ], + } + else: + EXPECTED_TEXTS = { + 7: [ + "Hello I am doing a project on the 1990s and I need to know what the most popular music", + "Hi today I am going to share with you a very easy and simple recipe of Khichdi", + ], + 8: [ + "Hello I am doing a project on the 1990s and I need to know what the most popular music", + "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", + ], + } model = AutoModelForCausalLM.from_pretrained( model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16, attn_implementation="sdpa" @@ -732,16 +757,24 @@ def test_model_7b_fp16(self): @require_read_token def test_model_7b_bf16(self): model_id = "google/gemma-7b" - EXPECTED_TEXTS = { - 7: [ - """Hello I am doing a project on a 1991 240sx and I am trying to find""", - "Hi today I am going to show you how to make a very simple and easy to make a very simple and", - ], - 8: [ - "Hello I am doing a project for my school and I am trying to make a program that will read a .txt file", - "Hi today I am going to show you how to make a very simple and easy to make a very simple and", - ], - } + if IS_ROCM_SYSTEM: + EXPECTED_TEXTS = { + 9: [ + "Hello I am doing a project for my school and I am trying to get a servo to move a certain amount of degrees", + "Hi today I am going to show you how to make a very simple and easy to make DIY light up sign", + ], + } + else: + EXPECTED_TEXTS = { + 7: [ + """Hello I am doing a project on a 1991 240sx and I am trying to find""", + "Hi today I am going to show you how to make a very simple and easy to make a very simple and", + ], + 8: [ + "Hello I am doing a project for my school and I am trying to make a program that will read a .txt file", + "Hi today I am going to show you how to make a very simple and easy to make a very simple and", + ], + } model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).to( torch_device diff --git a/tests/models/kosmos2/test_modeling_kosmos2.py b/tests/models/kosmos2/test_modeling_kosmos2.py index 9bc95b8bd44c..ca944d0df034 100644 --- a/tests/models/kosmos2/test_modeling_kosmos2.py +++ b/tests/models/kosmos2/test_modeling_kosmos2.py @@ -26,7 +26,7 @@ from transformers import AutoModelForVision2Seq, AutoProcessor, Kosmos2Config from transformers.models.kosmos2.configuration_kosmos2 import Kosmos2TextConfig, Kosmos2VisionConfig -from transformers.testing_utils import require_torch, require_vision, slow, torch_device +from transformers.testing_utils import IS_ROCM_SYSTEM, require_torch, require_vision, slow, torch_device from transformers.utils import is_torch_available, is_vision_available from ...test_configuration_common import ConfigTester @@ -562,6 +562,8 @@ def test_snowman_image_captioning(self): processed_text = processed_text[0] final_text, entities = final_text_with_entities[0] + atol = 1e-4 if IS_ROCM_SYSTEM else 1e-5 + np.testing.assert_allclose( torch.concat(scores[1:4])[:3, :3].to("cpu").numpy(), np.array( @@ -571,7 +573,7 @@ def test_snowman_image_captioning(self): [-0.9352350831031799, -4.688288688659668, 6.240612983703613], ] ), - atol=1e-5, + atol=atol, ) np.testing.assert_allclose( torch.concat(scores[-3:])[-3:, -3:].to("cpu").numpy(), @@ -629,7 +631,7 @@ def test_snowman_image_captioning(self): [-0.7624598741531372, -4.771658897399902, 6.576295852661133], ] ), - atol=1e-5, + atol=atol, ) np.testing.assert_allclose( torch.concat(scores[-3:])[-3:, -3:].to("cpu").numpy(), diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py index 5d402bd85994..8902c0dd81ea 100644 --- a/tests/models/llama/test_modeling_llama.py +++ b/tests/models/llama/test_modeling_llama.py @@ -24,6 +24,7 @@ from transformers import LlamaConfig, StaticCache, is_torch_available, set_seed from transformers.testing_utils import ( + IS_ROCM_SYSTEM, require_bitsandbytes, require_flash_attn, require_read_token, @@ -688,22 +689,32 @@ def test_compile_static_cache(self): NUM_TOKENS_TO_GENERATE = 40 # Note on `EXPECTED_TEXT_COMPLETION`'s diff: the current value matches the original test if the original test # was changed to have a cache of 53 tokens (as opposed to 4096), on Ampere GPUs. - EXPECTED_TEXT_COMPLETION = { - 8: [ - "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial " - "reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe " - "theory of relativ", - "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, " - "my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p", - ], - 7: [ - "Simply put, the theory of relativity states that 1. surely nothing is faster than light.\nThe theory " - "goes that nothing travels faster than light, but the faster you go, the slower everything else will " - "be.\nThe theory of relativity", - "My favorite all time favorite condiment is ketchup. I love it on hamburgers, hot dogs, fries, eggs, " - "and even on a good old fashioned cheeseburger. I love it on everything. I love it so", - ], - } + if IS_ROCM_SYSTEM: + EXPECTED_TEXT_COMPLETION = { + 9: [ + "Simply put, the theory of relativity states that 1) the speed of light is constant, 2) the speed" + " of lightis the same for all observers, and 3) the laws of physics are the same for all observers.", + "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs," + " my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p", + ], + } + else: + EXPECTED_TEXT_COMPLETION = { + 8: [ + "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial " + "reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe " + "theory of relativ", + "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, " + "my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p", + ], + 7: [ + "Simply put, the theory of relativity states that 1. surely nothing is faster than light.\nThe theory " + "goes that nothing travels faster than light, but the faster you go, the slower everything else will " + "be.\nThe theory of relativity", + "My favorite all time favorite condiment is ketchup. I love it on hamburgers, hot dogs, fries, eggs, " + "and even on a good old fashioned cheeseburger. I love it on everything. I love it so", + ], + } prompts = [ "Simply put, the theory of relativity states that ", diff --git a/tests/models/mixtral/test_modeling_mixtral.py b/tests/models/mixtral/test_modeling_mixtral.py index 0d92595d8cfa..052f7066bdb5 100644 --- a/tests/models/mixtral/test_modeling_mixtral.py +++ b/tests/models/mixtral/test_modeling_mixtral.py @@ -22,6 +22,7 @@ from transformers import MixtralConfig, is_torch_available from transformers.testing_utils import ( + IS_ROCM_SYSTEM, is_flaky, require_flash_attn, require_torch, @@ -529,14 +530,21 @@ def test_small_model_logits(self): ) # TODO: might need to tweak it in case the logits do not match on our daily runners # these logits have been obtained with the original megablocks impelmentation. - EXPECTED_LOGITS = { - 7: torch.Tensor([[0.1670, 0.1620, 0.6094], [-0.8906, -0.1588, -0.6060], [0.1572, 0.1290, 0.7246]]).to( - torch_device - ), - 8: torch.Tensor([[0.1631, 0.1621, 0.6094], [-0.8906, -0.1621, -0.6094], [0.1572, 0.1270, 0.7227]]).to( - torch_device - ), - } + if IS_ROCM_SYSTEM: + EXPECTED_LOGITS = { + 9: torch.Tensor([[0.1641, 0.1621, 0.6094], [-0.8945, -0.1631, -0.6094], [0.1572, 0.1260, 0.7227]]).to( + torch_device + ), + } + else: + EXPECTED_LOGITS = { + 7: torch.Tensor([[0.1670, 0.1620, 0.6094], [-0.8906, -0.1588, -0.6060], [0.1572, 0.1290, 0.7246]]).to( + torch_device + ), + 8: torch.Tensor([[0.1631, 0.1621, 0.6094], [-0.8906, -0.1621, -0.6094], [0.1572, 0.1270, 0.7227]]).to( + torch_device + ), + } with torch.no_grad(): logits = model(dummy_input).logits @@ -559,32 +567,51 @@ def test_small_model_logits_batched(self): ) # TODO: might need to tweak it in case the logits do not match on our daily runners - EXPECTED_LOGITS_LEFT = { - 7: torch.Tensor( - [[0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007]], - ).to(torch_device), - 8: torch.Tensor([[0.1914, 0.0508, 0.7188], [0.1953, 0.0510, 0.7227], [0.1973, 0.0562, 0.7148]]).to( - torch_device - ), - } + if IS_ROCM_SYSTEM: + EXPECTED_LOGITS_LEFT = { + 9: torch.Tensor([[0.1904, 0.0513, 0.7227], [0.1943, 0.0518, 0.7227], [0.1982, 0.0557, 0.7148]]).to( + torch_device + ), + } - EXPECTED_LOGITS_LEFT_UNPADDED = { - 7: torch.Tensor( - [[0.2212, 0.5200, -0.3816], [0.8213, -0.2313, 0.6069], [0.2664, -0.7090, 0.2468]], - ).to(torch_device), - 8: torch.Tensor([[0.2217, 0.5195, -0.3828], [0.8203, -0.2295, 0.6055], [0.2676, -0.7109, 0.2461]]).to( - torch_device - ), - } + EXPECTED_LOGITS_LEFT_UNPADDED = { + 9: torch.Tensor([[0.2236, 0.5195, -0.3828], [0.8203, -0.2285, 0.6055], [0.2637, -0.7109, 0.2451]]).to( + torch_device + ), + } - EXPECTED_LOGITS_RIGHT_UNPADDED = { - 7: torch.Tensor([[0.2205, 0.1232, -0.1611], [-0.3484, 0.3030, -1.0312], [0.0742, 0.7930, 0.7969]]).to( - torch_device - ), - 8: torch.Tensor([[0.2178, 0.1260, -0.1621], [-0.3496, 0.2988, -1.0312], [0.0693, 0.7930, 0.8008]]).to( - torch_device - ), - } + EXPECTED_LOGITS_RIGHT_UNPADDED = { + 9: torch.Tensor([[0.2197, 0.1250, -0.1611], [-0.3516, 0.3008, -1.0312], [0.0684, 0.7930, 0.8008]]).to( + torch_device + ), + } + else: + EXPECTED_LOGITS_LEFT = { + 7: torch.Tensor( + [[0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007]], + ).to(torch_device), + 8: torch.Tensor([[0.1914, 0.0508, 0.7188], [0.1953, 0.0510, 0.7227], [0.1973, 0.0562, 0.7148]]).to( + torch_device + ), + } + + EXPECTED_LOGITS_LEFT_UNPADDED = { + 7: torch.Tensor( + [[0.2212, 0.5200, -0.3816], [0.8213, -0.2313, 0.6069], [0.2664, -0.7090, 0.2468]], + ).to(torch_device), + 8: torch.Tensor([[0.2217, 0.5195, -0.3828], [0.8203, -0.2295, 0.6055], [0.2676, -0.7109, 0.2461]]).to( + torch_device + ), + } + + EXPECTED_LOGITS_RIGHT_UNPADDED = { + 7: torch.Tensor([[0.2205, 0.1232, -0.1611], [-0.3484, 0.3030, -1.0312], [0.0742, 0.7930, 0.7969]]).to( + torch_device + ), + 8: torch.Tensor([[0.2178, 0.1260, -0.1621], [-0.3496, 0.2988, -1.0312], [0.0693, 0.7930, 0.8008]]).to( + torch_device + ), + } with torch.no_grad(): logits = model(dummy_input, attention_mask=attention_mask).logits diff --git a/tests/models/perceiver/test_modeling_perceiver.py b/tests/models/perceiver/test_modeling_perceiver.py index fbd237bc1058..86bbc7b49d0b 100644 --- a/tests/models/perceiver/test_modeling_perceiver.py +++ b/tests/models/perceiver/test_modeling_perceiver.py @@ -26,7 +26,14 @@ from datasets import load_dataset from transformers import PerceiverConfig -from transformers.testing_utils import require_torch, require_torch_multi_gpu, require_vision, slow, torch_device +from transformers.testing_utils import ( + IS_ROCM_SYSTEM, + require_torch, + require_torch_multi_gpu, + require_vision, + slow, + torch_device, +) from transformers.utils import is_torch_available, is_vision_available from ...test_configuration_common import ConfigTester @@ -930,7 +937,8 @@ def test_inference_image_classification(self): expected_slice = torch.tensor([-1.1652, -0.1992, -0.7520], device=torch_device) - self.assertTrue(torch.allclose(logits[0, :3], expected_slice, atol=1e-4)) + atol = 1e-3 if IS_ROCM_SYSTEM else 1e-4 + self.assertTrue(torch.allclose(logits[0, :3], expected_slice, atol=atol)) @slow def test_inference_image_classification_fourier(self): diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index c420da4052f1..2b2f2bb2d96a 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -2503,7 +2503,7 @@ def test_trainer_eval_mrpc(self): ) eval_dataset = GlueDataset(data_args, tokenizer=tokenizer, mode="dev") - training_args = TrainingArguments(output_dir="./examples", use_cpu=True) + training_args = TrainingArguments(output_dir="./examples", use_cpu=True, report_to="none") trainer = Trainer(model=model, args=training_args, eval_dataset=eval_dataset) result = trainer.evaluate() self.assertLess(result["eval_loss"], 0.2) @@ -2524,6 +2524,7 @@ def test_trainer_eval_multiple(self): output_dir="./examples", use_cpu=True, per_device_eval_batch_size=1, + report_to="none", ) trainer = Trainer( model=model, @@ -3059,6 +3060,8 @@ def test_end_to_end_example(self): "--predict_with_generate", "--ddp_timeout", "60", + "--report_to", + "none", ] execute_subprocess_async(command) # successful return here == success - any errors would have caused an error or a timeout in the sub-call diff --git a/tests/trainer/test_trainer_seq2seq.py b/tests/trainer/test_trainer_seq2seq.py index 61d2163b9e81..17593e2e73bf 100644 --- a/tests/trainer/test_trainer_seq2seq.py +++ b/tests/trainer/test_trainer_seq2seq.py @@ -161,7 +161,6 @@ def test_return_sequences(self): tokenizer=tokenizer, data_collator=data_collator, compute_metrics=lambda x: {"samples": x[0].shape[0]}, - report_to="none", ) def prepare_data(examples): From 9a8032dabf25d0feb0b2ec7c4b35324de125e005 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 14 May 2024 10:31:29 +0000 Subject: [PATCH 2/5] update import --- src/transformers/testing_utils.py | 8 +++++--- tests/models/llama/test_modeling_llama.py | 11 ++++++++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index 045095c600c2..2757ea187570 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -38,7 +38,6 @@ from unittest import mock from unittest.mock import patch -import torch import urllib3 from transformers import logging as transformers_logging @@ -165,8 +164,11 @@ # Not critical, only usable on the sandboxed CI instance. TOKEN = "hf_94wBhPGp6KrrTH3KDchhKpRxZwd6dmHWLL" -IS_ROCM_SYSTEM = torch.version.hip is not None -IS_CUDA_SYSTEM = torch.version.cuda is not None +if is_torch_available(): + import torch + + IS_ROCM_SYSTEM = torch.version.hip is not None + IS_CUDA_SYSTEM = torch.version.cuda is not None def parse_flag_from_env(key, default=False): diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py index 8902c0dd81ea..8aecc584952e 100644 --- a/tests/models/llama/test_modeling_llama.py +++ b/tests/models/llama/test_modeling_llama.py @@ -692,12 +692,14 @@ def test_compile_static_cache(self): if IS_ROCM_SYSTEM: EXPECTED_TEXT_COMPLETION = { 9: [ - "Simply put, the theory of relativity states that 1) the speed of light is constant, 2) the speed" - " of lightis the same for all observers, and 3) the laws of physics are the same for all observers.", + "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial" + " reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe " + "theory of relativ", "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs," " my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p", ], } + expected_text_completion_idx = 9 else: EXPECTED_TEXT_COMPLETION = { 8: [ @@ -715,6 +717,7 @@ def test_compile_static_cache(self): "and even on a good old fashioned cheeseburger. I love it on everything. I love it so", ], } + expected_text_completion_idx = 8 prompts = [ "Simply put, the theory of relativity states that ", @@ -729,7 +732,9 @@ def test_compile_static_cache(self): # Dynamic Cache generated_ids = model.generate(**inputs, max_new_tokens=NUM_TOKENS_TO_GENERATE, do_sample=False) dynamic_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True) - self.assertEqual(EXPECTED_TEXT_COMPLETION[8], dynamic_text) # Both GPU architectures have the same output + self.assertEqual( + EXPECTED_TEXT_COMPLETION[expected_text_completion_idx], dynamic_text + ) # Both GPU architectures have the same output # Static Cache generated_ids = model.generate( From 7da11b810e5cc4d69abfa1047e1b2c0e8a38fda2 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Fri, 17 May 2024 11:20:37 +0000 Subject: [PATCH 3/5] updated dicts and comments --- tests/models/gemma/test_modeling_gemma.py | 158 ++++++++++-------- tests/models/llama/test_modeling_llama.py | 59 ++++--- tests/models/mistral/test_modeling_mistral.py | 10 ++ tests/models/mixtral/test_modeling_mixtral.py | 113 ++++++------- 4 files changed, 178 insertions(+), 162 deletions(-) diff --git a/tests/models/gemma/test_modeling_gemma.py b/tests/models/gemma/test_modeling_gemma.py index 3ea969a8c397..d9a1f9efc2b5 100644 --- a/tests/models/gemma/test_modeling_gemma.py +++ b/tests/models/gemma/test_modeling_gemma.py @@ -21,7 +21,6 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, GemmaConfig, is_torch_available from transformers.testing_utils import ( - IS_ROCM_SYSTEM, is_flaky, require_bitsandbytes, require_flash_attn, @@ -575,24 +574,25 @@ def test_model_2b_fp16_static_cache(self): @require_read_token def test_model_2b_bf16(self): model_id = "google/gemma-2b" - if IS_ROCM_SYSTEM: - EXPECTED_TEXTS = { - 9: [ - "Hello I am doing a project on the 1990s and I need to know what the most popular music", - "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", - ], - } - else: - EXPECTED_TEXTS = { - 7: [ - "Hello I am doing a project on the 1990s and I need to know what the most popular music", - "Hi today I am going to share with you a very easy and simple recipe of Khichdi", - ], - 8: [ - "Hello I am doing a project on the 1990s and I need to know what the most popular music", - "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", - ], - } + + # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4. + # + # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s, + # considering differences in hardware processing and potential deviations in generated text. + EXPECTED_TEXTS = { + 7: [ + "Hello I am doing a project on the 1990s and I need to know what the most popular music", + "Hi today I am going to share with you a very easy and simple recipe of Khichdi", + ], + 8: [ + "Hello I am doing a project on the 1990s and I need to know what the most popular music", + "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", + ], + 9: [ + "Hello I am doing a project on the 1990s and I need to know what the most popular music", + "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", + ], + } model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).to( torch_device @@ -609,24 +609,25 @@ def test_model_2b_bf16(self): @require_read_token def test_model_2b_eager(self): model_id = "google/gemma-2b" - if IS_ROCM_SYSTEM: - EXPECTED_TEXTS = { - 9: [ - "Hello I am doing a project on the 1990s and I need to know what the most popular music", - "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", - ], - } - else: - EXPECTED_TEXTS = { - 7: [ - "Hello I am doing a project on the 1990s and I am looking for some information on the ", - "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", - ], - 8: [ - "Hello I am doing a project on the 1990s and I need to know what the most popular music", - "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", - ], - } + + # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4. + # + # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s, + # considering differences in hardware processing and potential deviations in generated text. + EXPECTED_TEXTS = { + 7: [ + "Hello I am doing a project on the 1990s and I am looking for some information on the ", + "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", + ], + 8: [ + "Hello I am doing a project on the 1990s and I need to know what the most popular music", + "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", + ], + 9: [ + "Hello I am doing a project on the 1990s and I need to know what the most popular music", + "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", + ], + } model = AutoModelForCausalLM.from_pretrained( model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16, attn_implementation="eager" @@ -645,24 +646,25 @@ def test_model_2b_eager(self): @require_read_token def test_model_2b_sdpa(self): model_id = "google/gemma-2b" - if IS_ROCM_SYSTEM: - EXPECTED_TEXTS = { - 9: [ - "Hello I am doing a project on the 1990s and I need to know what the most popular music", - "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", - ], - } - else: - EXPECTED_TEXTS = { - 7: [ - "Hello I am doing a project on the 1990s and I need to know what the most popular music", - "Hi today I am going to share with you a very easy and simple recipe of Khichdi", - ], - 8: [ - "Hello I am doing a project on the 1990s and I need to know what the most popular music", - "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", - ], - } + + # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4. + # + # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s, + # considering differences in hardware processing and potential deviations in generated text. + EXPECTED_TEXTS = { + 7: [ + "Hello I am doing a project on the 1990s and I need to know what the most popular music", + "Hi today I am going to share with you a very easy and simple recipe of Khichdi", + ], + 8: [ + "Hello I am doing a project on the 1990s and I need to know what the most popular music", + "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", + ], + 9: [ + "Hello I am doing a project on the 1990s and I need to know what the most popular music", + "Hi today I am going to share with you a very easy and simple recipe of Kaju Kat", + ], + } model = AutoModelForCausalLM.from_pretrained( model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16, attn_implementation="sdpa" @@ -761,24 +763,25 @@ def test_model_7b_fp16(self): @require_read_token def test_model_7b_bf16(self): model_id = "google/gemma-7b" - if IS_ROCM_SYSTEM: - EXPECTED_TEXTS = { - 9: [ - "Hello I am doing a project for my school and I am trying to get a servo to move a certain amount of degrees", - "Hi today I am going to show you how to make a very simple and easy to make DIY light up sign", - ], - } - else: - EXPECTED_TEXTS = { - 7: [ - """Hello I am doing a project on a 1991 240sx and I am trying to find""", - "Hi today I am going to show you how to make a very simple and easy to make a very simple and", - ], - 8: [ - "Hello I am doing a project for my school and I am trying to make a program that will read a .txt file", - "Hi today I am going to show you how to make a very simple and easy to make a very simple and", - ], - } + + # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4. + # + # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s, + # considering differences in hardware processing and potential deviations in generated text. + EXPECTED_TEXTS = { + 7: [ + """Hello I am doing a project on a 1991 240sx and I am trying to find""", + "Hi today I am going to show you how to make a very simple and easy to make a very simple and", + ], + 8: [ + "Hello I am doing a project for my school and I am trying to make a program that will read a .txt file", + "Hi today I am going to show you how to make a very simple and easy to make a very simple and", + ], + 9: [ + "Hello I am doing a project for my school and I am trying to get a servo to move a certain amount of degrees", + "Hi today I am going to show you how to make a very simple and easy to make DIY light up sign", + ], + } model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).to( torch_device @@ -851,6 +854,11 @@ def test_compile_static_cache(self): NUM_TOKENS_TO_GENERATE = 40 # Note on `EXPECTED_TEXT_COMPLETION`'s diff: the current value matches the original test if the original test # was changed to have a cache of 53 tokens (as opposed to 4096), on Ampere GPUs. + # + # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4. + # + # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s, + # considering differences in hardware processing and potential deviations in generated text. EXPECTED_TEXT_COMPLETION = { 8: [ "Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found", @@ -860,6 +868,10 @@ def test_compile_static_cache(self): "Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found", "Hi today\nI have a problem with my 2007 1.9 tdi 105bhp.\nI have a problem with the engine management light on.\nI have checked the", ], + 9: [ + "Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found", + "Hi today\nI have a problem with my 2007 1.9 tdi 105bhp.\nI have a problem with the engine management light on.\nI have checked the", + ], } prompts = ["Hello I am doing", "Hi today"] diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py index 829441a87789..cf579f187470 100644 --- a/tests/models/llama/test_modeling_llama.py +++ b/tests/models/llama/test_modeling_llama.py @@ -24,7 +24,6 @@ from transformers import LlamaConfig, StaticCache, is_torch_available, set_seed from transformers.testing_utils import ( - IS_ROCM_SYSTEM, require_bitsandbytes, require_flash_attn, require_read_token, @@ -692,35 +691,35 @@ def test_compile_static_cache(self): NUM_TOKENS_TO_GENERATE = 40 # Note on `EXPECTED_TEXT_COMPLETION`'s diff: the current value matches the original test if the original test # was changed to have a cache of 53 tokens (as opposed to 4096), on Ampere GPUs. - if IS_ROCM_SYSTEM: - EXPECTED_TEXT_COMPLETION = { - 9: [ - "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial" - " reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe " - "theory of relativ", - "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs," - " my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p", - ], - } - expected_text_completion_idx = 9 - else: - EXPECTED_TEXT_COMPLETION = { - 8: [ - "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial " - "reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe " - "theory of relativ", - "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, " - "my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p", - ], - 7: [ - "Simply put, the theory of relativity states that 1. surely nothing is faster than light.\nThe theory " - "goes that nothing travels faster than light, but the faster you go, the slower everything else will " - "be.\nThe theory of relativity", - "My favorite all time favorite condiment is ketchup. I love it on hamburgers, hot dogs, fries, eggs, " - "and even on a good old fashioned cheeseburger. I love it on everything. I love it so", - ], - } - expected_text_completion_idx = 8 + # + # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4. + # + # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s, + # considering differences in hardware processing and potential deviations in generated text. + EXPECTED_TEXT_COMPLETION = { + 8: [ + "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial " + "reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe " + "theory of relativ", + "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, " + "my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p", + ], + 7: [ + "Simply put, the theory of relativity states that 1. surely nothing is faster than light.\nThe theory " + "goes that nothing travels faster than light, but the faster you go, the slower everything else will " + "be.\nThe theory of relativity", + "My favorite all time favorite condiment is ketchup. I love it on hamburgers, hot dogs, fries, eggs, " + "and even on a good old fashioned cheeseburger. I love it on everything. I love it so", + ], + 9: [ + "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial" + " reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe " + "theory of relativ", + "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs," + " my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p", + ], + } + expected_text_completion_idx = 8 prompts = [ "Simply put, the theory of relativity states that ", diff --git a/tests/models/mistral/test_modeling_mistral.py b/tests/models/mistral/test_modeling_mistral.py index bbc36c050e23..68a70fcaa56e 100644 --- a/tests/models/mistral/test_modeling_mistral.py +++ b/tests/models/mistral/test_modeling_mistral.py @@ -500,9 +500,14 @@ def test_model_7b_logits(self): EXPECTED_MEAN = torch.tensor([[-2.5548, -2.5737, -3.0600, -2.5906, -2.8478, -2.8118, -2.9325, -2.7694]]) torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, atol=1e-2, rtol=1e-2) + # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4. + # + # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s, + # considering differences in hardware processing and potential deviations in output. EXPECTED_SLICE = { 7: torch.tensor([-5.8781, -5.8616, -0.1052, -4.7200, -5.8781, -5.8774, -5.8773, -5.8777, -5.8781, -5.8780, -5.8781, -5.8779, -1.0787, 1.7583, -5.8779, -5.8780, -5.8783, -5.8778, -5.8776, -5.8781, -5.8784, -5.8778, -5.8778, -5.8777, -5.8779, -5.8778, -5.8776, -5.8780, -5.8779, -5.8781]), 8: torch.tensor([-5.8711, -5.8555, -0.1050, -4.7148, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -1.0781, 1.7568, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711]), + 9: torch.tensor([-5.8750, -5.8594, -0.1047, -4.7188, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -1.0781, 1.7578, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750]), } # fmt: skip print(out[0, 0, :30]) @@ -605,9 +610,14 @@ def test_model_7b_long_prompt_sdpa(self): @slow def test_speculative_generation(self): + # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4. + # + # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s, + # considering differences in hardware processing and potential deviations in generated text. EXPECTED_TEXT_COMPLETION = { 7: "My favourite condiment is 100% Sriracha. I love the heat, the tang and the fact costs", 8: "My favourite condiment is 100% Sriracha. I love the heat, the sweetness, the tang", + 9: "My favourite condiment is 100% ketchup. I love it on everything. I’m not a big", } prompt = "My favourite condiment is " tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", use_fast=False) diff --git a/tests/models/mixtral/test_modeling_mixtral.py b/tests/models/mixtral/test_modeling_mixtral.py index 052f7066bdb5..d90294397488 100644 --- a/tests/models/mixtral/test_modeling_mixtral.py +++ b/tests/models/mixtral/test_modeling_mixtral.py @@ -22,7 +22,6 @@ from transformers import MixtralConfig, is_torch_available from transformers.testing_utils import ( - IS_ROCM_SYSTEM, is_flaky, require_flash_attn, require_torch, @@ -530,24 +529,25 @@ def test_small_model_logits(self): ) # TODO: might need to tweak it in case the logits do not match on our daily runners # these logits have been obtained with the original megablocks impelmentation. - if IS_ROCM_SYSTEM: - EXPECTED_LOGITS = { - 9: torch.Tensor([[0.1641, 0.1621, 0.6094], [-0.8945, -0.1631, -0.6094], [0.1572, 0.1260, 0.7227]]).to( - torch_device - ), - } - else: - EXPECTED_LOGITS = { - 7: torch.Tensor([[0.1670, 0.1620, 0.6094], [-0.8906, -0.1588, -0.6060], [0.1572, 0.1290, 0.7246]]).to( - torch_device - ), - 8: torch.Tensor([[0.1631, 0.1621, 0.6094], [-0.8906, -0.1621, -0.6094], [0.1572, 0.1270, 0.7227]]).to( - torch_device - ), - } + # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4. + # + # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s, + # considering differences in hardware processing and potential deviations in output. + EXPECTED_LOGITS = { + 7: torch.Tensor([[0.1670, 0.1620, 0.6094], [-0.8906, -0.1588, -0.6060], [0.1572, 0.1290, 0.7246]]).to( + torch_device + ), + 8: torch.Tensor([[0.1631, 0.1621, 0.6094], [-0.8906, -0.1621, -0.6094], [0.1572, 0.1270, 0.7227]]).to( + torch_device + ), + 9: torch.Tensor([[0.1641, 0.1621, 0.6094], [-0.8906, -0.1631, -0.6094], [0.1572, 0.1260, 0.7227]]).to( + torch_device + ), + } with torch.no_grad(): logits = model(dummy_input).logits + print(logits[:, :3, :3]) torch.testing.assert_close( logits[0, :3, :3], EXPECTED_LOGITS[self.cuda_compute_capability_major_version], atol=1e-3, rtol=1e-3 ) @@ -567,51 +567,46 @@ def test_small_model_logits_batched(self): ) # TODO: might need to tweak it in case the logits do not match on our daily runners - if IS_ROCM_SYSTEM: - EXPECTED_LOGITS_LEFT = { - 9: torch.Tensor([[0.1904, 0.0513, 0.7227], [0.1943, 0.0518, 0.7227], [0.1982, 0.0557, 0.7148]]).to( - torch_device - ), - } - - EXPECTED_LOGITS_LEFT_UNPADDED = { - 9: torch.Tensor([[0.2236, 0.5195, -0.3828], [0.8203, -0.2285, 0.6055], [0.2637, -0.7109, 0.2451]]).to( - torch_device - ), - } + # + # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4. + # + # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s, + # considering differences in hardware processing and potential deviations in generated text. + EXPECTED_LOGITS_LEFT = { + 7: torch.Tensor( + [[0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007]], + ).to(torch_device), + 8: torch.Tensor([[0.1914, 0.0508, 0.7188], [0.1953, 0.0510, 0.7227], [0.1973, 0.0562, 0.7148]]).to( + torch_device + ), + 9: torch.Tensor([[0.1904, 0.0513, 0.7227], [0.1943, 0.0518, 0.7227], [0.1982, 0.0557, 0.7148]]).to( + torch_device + ), + } - EXPECTED_LOGITS_RIGHT_UNPADDED = { - 9: torch.Tensor([[0.2197, 0.1250, -0.1611], [-0.3516, 0.3008, -1.0312], [0.0684, 0.7930, 0.8008]]).to( - torch_device - ), - } - else: - EXPECTED_LOGITS_LEFT = { - 7: torch.Tensor( - [[0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007]], - ).to(torch_device), - 8: torch.Tensor([[0.1914, 0.0508, 0.7188], [0.1953, 0.0510, 0.7227], [0.1973, 0.0562, 0.7148]]).to( - torch_device - ), - } - - EXPECTED_LOGITS_LEFT_UNPADDED = { - 7: torch.Tensor( - [[0.2212, 0.5200, -0.3816], [0.8213, -0.2313, 0.6069], [0.2664, -0.7090, 0.2468]], - ).to(torch_device), - 8: torch.Tensor([[0.2217, 0.5195, -0.3828], [0.8203, -0.2295, 0.6055], [0.2676, -0.7109, 0.2461]]).to( - torch_device - ), - } + EXPECTED_LOGITS_LEFT_UNPADDED = { + 7: torch.Tensor( + [[0.2212, 0.5200, -0.3816], [0.8213, -0.2313, 0.6069], [0.2664, -0.7090, 0.2468]], + ).to(torch_device), + 8: torch.Tensor([[0.2217, 0.5195, -0.3828], [0.8203, -0.2295, 0.6055], [0.2676, -0.7109, 0.2461]]).to( + torch_device + ), + 9: torch.Tensor([[0.2236, 0.5195, -0.3828], [0.8203, -0.2285, 0.6055], [0.2637, -0.7109, 0.2451]]).to( + torch_device + ), + } - EXPECTED_LOGITS_RIGHT_UNPADDED = { - 7: torch.Tensor([[0.2205, 0.1232, -0.1611], [-0.3484, 0.3030, -1.0312], [0.0742, 0.7930, 0.7969]]).to( - torch_device - ), - 8: torch.Tensor([[0.2178, 0.1260, -0.1621], [-0.3496, 0.2988, -1.0312], [0.0693, 0.7930, 0.8008]]).to( - torch_device - ), - } + EXPECTED_LOGITS_RIGHT_UNPADDED = { + 7: torch.Tensor([[0.2205, 0.1232, -0.1611], [-0.3484, 0.3030, -1.0312], [0.0742, 0.7930, 0.7969]]).to( + torch_device + ), + 8: torch.Tensor([[0.2178, 0.1260, -0.1621], [-0.3496, 0.2988, -1.0312], [0.0693, 0.7930, 0.8008]]).to( + torch_device + ), + 9: torch.Tensor([[0.2197, 0.1250, -0.1611], [-0.3516, 0.3008, -1.0312], [0.0684, 0.7930, 0.8008]]).to( + torch_device + ), + } with torch.no_grad(): logits = model(dummy_input, attention_mask=attention_mask).logits From 7841c76a384c502904a609d5fcd0537a56af1486 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Fri, 17 May 2024 11:23:31 +0000 Subject: [PATCH 4/5] remove prints --- tests/models/mistral/test_modeling_mistral.py | 1 - tests/models/mixtral/test_modeling_mixtral.py | 1 - 2 files changed, 2 deletions(-) diff --git a/tests/models/mistral/test_modeling_mistral.py b/tests/models/mistral/test_modeling_mistral.py index 68a70fcaa56e..0c845e1ce2d0 100644 --- a/tests/models/mistral/test_modeling_mistral.py +++ b/tests/models/mistral/test_modeling_mistral.py @@ -510,7 +510,6 @@ def test_model_7b_logits(self): 9: torch.tensor([-5.8750, -5.8594, -0.1047, -4.7188, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -1.0781, 1.7578, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750]), } # fmt: skip - print(out[0, 0, :30]) torch.testing.assert_close( out[0, 0, :30], EXPECTED_SLICE[self.cuda_compute_capability_major_version], atol=1e-4, rtol=1e-4 ) diff --git a/tests/models/mixtral/test_modeling_mixtral.py b/tests/models/mixtral/test_modeling_mixtral.py index d90294397488..1926a88a4c8b 100644 --- a/tests/models/mixtral/test_modeling_mixtral.py +++ b/tests/models/mixtral/test_modeling_mixtral.py @@ -547,7 +547,6 @@ def test_small_model_logits(self): with torch.no_grad(): logits = model(dummy_input).logits - print(logits[:, :3, :3]) torch.testing.assert_close( logits[0, :3, :3], EXPECTED_LOGITS[self.cuda_compute_capability_major_version], atol=1e-3, rtol=1e-3 ) From b64b9d5d41fe5a2ef720ddd8cb26e15b56d836a1 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Fri, 17 May 2024 23:37:50 +0530 Subject: [PATCH 5/5] Update testing_utils.py --- src/transformers/testing_utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index 74d3662132c5..d997bc631b0e 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -170,6 +170,9 @@ IS_ROCM_SYSTEM = torch.version.hip is not None IS_CUDA_SYSTEM = torch.version.cuda is not None +else: + IS_ROCM_SYSTEM = False + IS_CUDA_SYSTEM = False def parse_flag_from_env(key, default=False):