From bfe1a58a879d010e8a83bd224570a451ae1bfae5 Mon Sep 17 00:00:00 2001 From: simonJJJ <821898965@qq.com> Date: Wed, 28 Aug 2024 17:39:57 +0800 Subject: [PATCH 1/8] qwen2vl_align_kv_seqlen_to_qwen2 --- .../models/qwen2_vl/modeling_qwen2_vl.py | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py b/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py index 6ab813ad9ade91..5e7919a95a7dce 100644 --- a/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py +++ b/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py @@ -550,8 +550,13 @@ def forward( kv_seq_len = key_states.shape[-2] if past_key_value is not None: - kv_seq_len += cache_position[0] + 1 - + if self.layer_idx is None: + raise ValueError( + f"The cache structure has changed since version v4.36. If you are using {self.__class__.__name__} " + "for auto-regressive decoding with k/v caching, please make sure to initialize the attention class " + "with a layer index." + ) + kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx) cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len) query_states, key_states = apply_multimodal_rotary_pos_emb( query_states, key_states, cos, sin, position_ids, self.rope_scaling["mrope_section"] @@ -632,10 +637,19 @@ def forward( kv_seq_len = key_states.shape[-2] if past_key_value is not None: - kv_seq_len += cache_position[0] + 1 + if self.layer_idx is None: + raise ValueError( + f"The cache structure has changed since version v4.36. If you are using {self.__class__.__name__} " + "for auto-regressive decoding with k/v caching, please make sure to initialize the attention class " + "with a layer index." + ) + kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx) # Because the input can be padded, the absolute sequence length depends on the max position id. - rotary_seq_len = cache_position[-1] + rotary_seq_len = ( + max(kv_seq_len, position_ids[:, -1].max().item() + 1) if position_ids is not None else kv_seq_len + ) + cos, sin = self.rotary_emb(value_states, seq_len=rotary_seq_len) query_states, key_states = apply_multimodal_rotary_pos_emb( From c99c6bff298c7745d513628112e9c101fe810c2c Mon Sep 17 00:00:00 2001 From: "baishuai.bs" <1051314669@qq.com> Date: Thu, 5 Sep 2024 15:47:28 +0800 Subject: [PATCH 2/8] flash att test --- .../models/qwen2_vl/test_modeling_qwen2_vl.py | 134 +++++++++++++----- 1 file changed, 99 insertions(+), 35 deletions(-) diff --git a/tests/models/qwen2_vl/test_modeling_qwen2_vl.py b/tests/models/qwen2_vl/test_modeling_qwen2_vl.py index ba3b30d94533e0..ec6444331ed2d7 100644 --- a/tests/models/qwen2_vl/test_modeling_qwen2_vl.py +++ b/tests/models/qwen2_vl/test_modeling_qwen2_vl.py @@ -28,7 +28,9 @@ ) from transformers.testing_utils import ( require_bitsandbytes, + require_flash_attn, require_torch, + require_torch_gpu, slow, torch_device, ) @@ -334,12 +336,12 @@ def test_small_model_integration_test(self): expected_pixel_slice = torch.tensor( [ - [0.8501, 0.8647, 0.8647], - [1.0106, 1.0106, 1.0252], - [0.9960, 1.0106, 1.0252], - [1.0982, 1.1128, 1.1274], - [1.0836, 1.0982, 1.0982], - [1.1858, 1.1858, 1.1858], + [0.8501, 0.8501, 0.8647], + [0.9376, 0.9376, 0.9376], + [0.9084, 0.9376, 0.9376], + [1.0252, 1.0252, 1.0544], + [1.0252, 1.0252, 1.0252], + [1.0836, 1.0836, 1.0836], ], dtype=torch.float32, device="cpu", @@ -350,7 +352,7 @@ def test_small_model_integration_test(self): inputs = inputs.to(torch_device) output = model.generate(**inputs, max_new_tokens=30) - EXPECTED_DECODED_TEXT = "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?assistant\nThe dog in the picture appears to be a Labrador Retriever or a similar breed. Labradors are known for their friendly and intelligent nature," + EXPECTED_DECODED_TEXT = "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices" self.assertEqual( self.processor.decode(output[0], skip_special_tokens=True), @@ -392,57 +394,119 @@ def test_small_model_integration_test_batch_wo_image(self): {"role": "user", "content": "Who are you?"}, ] text2 = self.processor.apply_chat_template(messages2, tokenize=False, add_generation_prompt=True) - inputs = self.processor(text=[text, text2], images=[self.image], return_tensors="pt").to(torch_device) + inputs = self.processor(text=[text, text2], images=[self.image], padding=True, return_tensors="pt").to( + torch_device + ) # it should not matter whether two images are the same size or not output = model.generate(**inputs, max_new_tokens=30) EXPECTED_DECODED_TEXT = [ - "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?assistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and outgoing personalities, as well as their", - "system\nYou are a helpful assistant.user\nWho are you?assistant\nI am Qwen, a large language model created by Alibaba Cloud. I am designed to assist with various tasks and answer a wide range of questions to", + "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices", + "system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\nI am Qwen, a large language model created by Alibaba Cloud. I am designed to answer a wide range of questions and provide information on various topics", ] self.assertEqual( - self.processor.batch_decode(output, skip_special_tokens=True), - EXPECTED_DECODED_TEXT, + self.processor.batch_decode(output[0], skip_special_tokens=True), + EXPECTED_DECODED_TEXT[0], + ) + self.assertEqual( + self.processor.batch_decode(output[1], skip_special_tokens=True), + EXPECTED_DECODED_TEXT[1], ) @slow @require_bitsandbytes def test_small_model_integration_test_batch_different_resolutions(self): model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", load_in_4bit=True) - text, vision_infos = self.processor.apply_chat_template( - self.messages, tokenize=False, add_generation_prompt=True + text = self.processor.apply_chat_template(self.messages, tokenize=False, add_generation_prompt=True) + text2 = self.processor.apply_chat_template(self.messages, tokenize=False, add_generation_prompt=True) + image2 = self.image.resize((224, 224)) + inputs = self.processor(text=[text, text2], images=[self.image, image2], padding=True, return_tensors="pt").to( + torch_device + ) + + # it should not matter whether two images are the same size or not + output = model.generate(**inputs, max_new_tokens=30) + + EXPECTED_DECODED_TEXT = [ + "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices", + "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Golden Retriever. Golden Retrievers are known for their friendly and affectionate nature, as well as", + ] + self.assertEqual( + self.processor.batch_decode(output[0], skip_special_tokens=True), + EXPECTED_DECODED_TEXT[0], + ) + self.assertEqual( + self.processor.batch_decode(output[1], skip_special_tokens=True), + EXPECTED_DECODED_TEXT[1], + ) + + @slow + @require_flash_attn + @require_torch_gpu + def test_small_model_integration_test_batch_flashatt2(self): + model = Qwen2VLForConditionalGeneration.from_pretrained( + "Qwen/Qwen2-VL-7B-Instruct", + torch_dtype=torch.bfloat16, + attn_implementation="flash_attention_2", + device_map="auto", + ) + text = self.processor.apply_chat_template(self.messages, tokenize=False, add_generation_prompt=True) + inputs = self.processor(text=[text, text], images=[self.image, self.image], return_tensors="pt").to( + torch_device + ) + + # it should not matter whether two images are the same size or not + output = model.generate(**inputs, max_new_tokens=30) + + EXPECTED_DECODED_TEXT = [ + "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices", + "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices", + ] + + self.assertEqual( + self.processor.batch_decode(output[0], skip_special_tokens=True), + EXPECTED_DECODED_TEXT[0], + ) + self.assertEqual( + self.processor.batch_decode(output, skip_special_tokens=True)[0], + self.processor.batch_decode(output, skip_special_tokens=True)[1], ) + + @slow + @require_flash_attn + @require_torch_gpu + def test_small_model_integration_test_batch_wo_image_flashatt2(self): + model = Qwen2VLForConditionalGeneration.from_pretrained( + "Qwen/Qwen2-VL-7B-Instruct", + torch_dtype=torch.bfloat16, + attn_implementation="flash_attention_2", + device_map="auto", + ) + text = self.processor.apply_chat_template(self.messages, tokenize=False, add_generation_prompt=True) messages2 = [ - { - "role": "user", - "content": [ - { - "type": "image", - "image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg", - "resized_height": 504, - "resized_width": 252, - }, - {"type": "text", "text": "What kind of dog is this?"}, - ], - } + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Who are you?"}, ] - text2, vision_infos2 = self.processor.apply_chat_template( - messages2, tokenize=False, add_generation_prompt=True + text2 = self.processor.apply_chat_template(messages2, tokenize=False, add_generation_prompt=True) + inputs = self.processor(text=[text, text2], images=[self.image], padding=True, return_tensors="pt").to( + torch_device ) - inputs = self.processor( - text=[text, text2], vision_infos=[vision_infos, vision_infos2], return_tensors="pt" - ).to(torch_device) # it should not matter whether two images are the same size or not output = model.generate(**inputs, max_new_tokens=30) EXPECTED_DECODED_TEXT = [ - "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?assistant\nThe dog in the picture appears to be a Labrador Retriever or a similar breed. Labradors are known for their friendly and intelligent nature,", - "system\nYou are a helpful assistant.\nuser\nWho are you?assistant\nI am a large language model created by Alibaba Cloud. I am called Qwen.", + "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices", + "system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\nI am Qwen, a large language model created by Alibaba Cloud. I am designed to answer a wide range of questions and provide information on various topics", ] + self.assertEqual( - self.processor.batch_decode(output, skip_special_tokens=True), - EXPECTED_DECODED_TEXT, + self.processor.batch_decode(output[0], skip_special_tokens=True), + EXPECTED_DECODED_TEXT[0], + ) + self.assertEqual( + self.processor.batch_decode(output, skip_special_tokens=True)[0], + self.processor.batch_decode(output, skip_special_tokens=True)[1], ) From 35f0e2d5c907b24887e9723d122a3fccf5a12299 Mon Sep 17 00:00:00 2001 From: ShuaiBai623 Date: Thu, 5 Sep 2024 17:05:54 +0800 Subject: [PATCH 3/8] [run-slow] qwen2_vl --- .../models/qwen2_vl/test_modeling_qwen2_vl.py | 28 ++++++------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/tests/models/qwen2_vl/test_modeling_qwen2_vl.py b/tests/models/qwen2_vl/test_modeling_qwen2_vl.py index ec6444331ed2d7..9a1938d905b11c 100644 --- a/tests/models/qwen2_vl/test_modeling_qwen2_vl.py +++ b/tests/models/qwen2_vl/test_modeling_qwen2_vl.py @@ -407,12 +407,8 @@ def test_small_model_integration_test_batch_wo_image(self): ] self.assertEqual( - self.processor.batch_decode(output[0], skip_special_tokens=True), - EXPECTED_DECODED_TEXT[0], - ) - self.assertEqual( - self.processor.batch_decode(output[1], skip_special_tokens=True), - EXPECTED_DECODED_TEXT[1], + self.processor.batch_decode(output, skip_special_tokens=True), + EXPECTED_DECODED_TEXT, ) @slow @@ -434,12 +430,8 @@ def test_small_model_integration_test_batch_different_resolutions(self): "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Golden Retriever. Golden Retrievers are known for their friendly and affectionate nature, as well as", ] self.assertEqual( - self.processor.batch_decode(output[0], skip_special_tokens=True), - EXPECTED_DECODED_TEXT[0], - ) - self.assertEqual( - self.processor.batch_decode(output[1], skip_special_tokens=True), - EXPECTED_DECODED_TEXT[1], + self.processor.batch_decode(output, skip_special_tokens=True), + EXPECTED_DECODED_TEXT, ) @slow @@ -466,8 +458,8 @@ def test_small_model_integration_test_batch_flashatt2(self): ] self.assertEqual( - self.processor.batch_decode(output[0], skip_special_tokens=True), - EXPECTED_DECODED_TEXT[0], + self.processor.batch_decode(output, skip_special_tokens=True), + EXPECTED_DECODED_TEXT, ) self.assertEqual( self.processor.batch_decode(output, skip_special_tokens=True)[0], @@ -503,10 +495,6 @@ def test_small_model_integration_test_batch_wo_image_flashatt2(self): ] self.assertEqual( - self.processor.batch_decode(output[0], skip_special_tokens=True), - EXPECTED_DECODED_TEXT[0], - ) - self.assertEqual( - self.processor.batch_decode(output, skip_special_tokens=True)[0], - self.processor.batch_decode(output, skip_special_tokens=True)[1], + self.processor.batch_decode(output, skip_special_tokens=True), + EXPECTED_DECODED_TEXT, ) From ba455d351dc8fd3349332d2bcfba92491a869412 Mon Sep 17 00:00:00 2001 From: ShuaiBai623 Date: Thu, 5 Sep 2024 18:53:32 +0800 Subject: [PATCH 4/8] [run-slow] qwen2_vl fix OOM --- .../models/qwen2_vl/test_modeling_qwen2_vl.py | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/tests/models/qwen2_vl/test_modeling_qwen2_vl.py b/tests/models/qwen2_vl/test_modeling_qwen2_vl.py index 9a1938d905b11c..13c8c904927c0e 100644 --- a/tests/models/qwen2_vl/test_modeling_qwen2_vl.py +++ b/tests/models/qwen2_vl/test_modeling_qwen2_vl.py @@ -27,7 +27,6 @@ is_vision_available, ) from transformers.testing_utils import ( - require_bitsandbytes, require_flash_attn, require_torch, require_torch_gpu, @@ -313,7 +312,7 @@ def setUp(self): ], } ] - url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg" + url = "https://qianwen-res.oss-accelerate-overseas.aliyuncs.com/Qwen2-VL/demo_small.jpg" self.image = Image.open(requests.get(url, stream=True).raw) def tearDown(self): @@ -321,11 +320,9 @@ def tearDown(self): torch.cuda.empty_cache() @slow - @require_bitsandbytes def test_small_model_integration_test(self): model = Qwen2VLForConditionalGeneration.from_pretrained( - "Qwen/Qwen2-VL-7B-Instruct", - load_in_4bit=True, + "Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto" ) text = self.processor.apply_chat_template(self.messages, tokenize=False, add_generation_prompt=True) @@ -336,17 +333,17 @@ def test_small_model_integration_test(self): expected_pixel_slice = torch.tensor( [ - [0.8501, 0.8501, 0.8647], - [0.9376, 0.9376, 0.9376], - [0.9084, 0.9376, 0.9376], - [1.0252, 1.0252, 1.0544], - [1.0252, 1.0252, 1.0252], - [1.0836, 1.0836, 1.0836], + [0.8792, 0.8792, 0.9084], + [1.1858, 1.1858, 1.2296], + [1.2004, 1.2004, 1.2150], + [1.4340, 1.4340, 1.4194], + [1.3902, 1.4048, 1.4194], + [1.5216, 1.5362, 1.5362], ], dtype=torch.float32, device="cpu", ) - assert torch.allclose(expected_pixel_slice, inputs.pixel_values[:6, :3], atol=1e-3) + assert torch.allclose(expected_pixel_slice, inputs.pixel_values[:6, :3], atol=3e-3) # verify generation inputs = inputs.to(torch_device) @@ -360,9 +357,10 @@ def test_small_model_integration_test(self): ) @slow - @require_bitsandbytes def test_small_model_integration_test_batch(self): - model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", load_in_4bit=True) + model = Qwen2VLForConditionalGeneration.from_pretrained( + "Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto" + ) text = self.processor.apply_chat_template(self.messages, tokenize=False, add_generation_prompt=True) inputs = self.processor(text=[text, text], images=[self.image, self.image], return_tensors="pt").to( torch_device @@ -372,8 +370,8 @@ def test_small_model_integration_test_batch(self): output = model.generate(**inputs, max_new_tokens=30) EXPECTED_DECODED_TEXT = [ - "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?assistant\nThe dog in the picture appears to be a Labrador Retriever or a similar breed. Labradors are known for their friendly and intelligent nature,", - "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?assistant\nThe dog in the image appears to be a Labrador Retriever or a similar breed. Labradors are known for their friendly and outgoing nature,", + "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices", + "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices", ] self.assertEqual( self.processor.batch_decode(output, skip_special_tokens=True), @@ -385,9 +383,10 @@ def test_small_model_integration_test_batch(self): ) @slow - @require_bitsandbytes def test_small_model_integration_test_batch_wo_image(self): - model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", load_in_4bit=True) + model = Qwen2VLForConditionalGeneration.from_pretrained( + "Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto" + ) text = self.processor.apply_chat_template(self.messages, tokenize=False, add_generation_prompt=True) messages2 = [ {"role": "system", "content": "You are a helpful assistant."}, @@ -412,9 +411,10 @@ def test_small_model_integration_test_batch_wo_image(self): ) @slow - @require_bitsandbytes def test_small_model_integration_test_batch_different_resolutions(self): - model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", load_in_4bit=True) + model = Qwen2VLForConditionalGeneration.from_pretrained( + "Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto" + ) text = self.processor.apply_chat_template(self.messages, tokenize=False, add_generation_prompt=True) text2 = self.processor.apply_chat_template(self.messages, tokenize=False, add_generation_prompt=True) image2 = self.image.resize((224, 224)) @@ -427,7 +427,7 @@ def test_small_model_integration_test_batch_different_resolutions(self): EXPECTED_DECODED_TEXT = [ "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices", - "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Golden Retriever. Golden Retrievers are known for their friendly and affectionate nature, as well as", + "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices", ] self.assertEqual( self.processor.batch_decode(output, skip_special_tokens=True), From ca2828cf600da320b6bbd6d49897810263bd48bd Mon Sep 17 00:00:00 2001 From: ShuaiBai623 Date: Thu, 5 Sep 2024 20:20:30 +0800 Subject: [PATCH 5/8] [run-slow] qwen2_vl --- .../models/qwen2_vl/test_modeling_qwen2_vl.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/models/qwen2_vl/test_modeling_qwen2_vl.py b/tests/models/qwen2_vl/test_modeling_qwen2_vl.py index 13c8c904927c0e..6673ee0745e04c 100644 --- a/tests/models/qwen2_vl/test_modeling_qwen2_vl.py +++ b/tests/models/qwen2_vl/test_modeling_qwen2_vl.py @@ -349,7 +349,7 @@ def test_small_model_integration_test(self): inputs = inputs.to(torch_device) output = model.generate(**inputs, max_new_tokens=30) - EXPECTED_DECODED_TEXT = "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices" + EXPECTED_DECODED_TEXT = "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets" self.assertEqual( self.processor.decode(output[0], skip_special_tokens=True), @@ -370,8 +370,8 @@ def test_small_model_integration_test_batch(self): output = model.generate(**inputs, max_new_tokens=30) EXPECTED_DECODED_TEXT = [ - "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices", - "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices", + "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets", + "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets", ] self.assertEqual( self.processor.batch_decode(output, skip_special_tokens=True), @@ -401,7 +401,7 @@ def test_small_model_integration_test_batch_wo_image(self): output = model.generate(**inputs, max_new_tokens=30) EXPECTED_DECODED_TEXT = [ - "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices", + "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets", "system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\nI am Qwen, a large language model created by Alibaba Cloud. I am designed to answer a wide range of questions and provide information on various topics", ] @@ -426,8 +426,8 @@ def test_small_model_integration_test_batch_different_resolutions(self): output = model.generate(**inputs, max_new_tokens=30) EXPECTED_DECODED_TEXT = [ - "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices", - "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices", + "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets", + "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets", ] self.assertEqual( self.processor.batch_decode(output, skip_special_tokens=True), @@ -453,8 +453,8 @@ def test_small_model_integration_test_batch_flashatt2(self): output = model.generate(**inputs, max_new_tokens=30) EXPECTED_DECODED_TEXT = [ - "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices", - "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices", + "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets", + "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets", ] self.assertEqual( @@ -490,7 +490,7 @@ def test_small_model_integration_test_batch_wo_image_flashatt2(self): output = model.generate(**inputs, max_new_tokens=30) EXPECTED_DECODED_TEXT = [ - "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices", + "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets", "system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\nI am Qwen, a large language model created by Alibaba Cloud. I am designed to answer a wide range of questions and provide information on various topics", ] From 00a141549bddcdc3190099a7c77636e817fd6002 Mon Sep 17 00:00:00 2001 From: ShuaiBai623 <43326198+ShuaiBai623@users.noreply.github.com> Date: Thu, 5 Sep 2024 23:22:38 +0800 Subject: [PATCH 6/8] Update tests/models/qwen2_vl/test_modeling_qwen2_vl.py Co-authored-by: Raushan Turganbay --- tests/models/qwen2_vl/test_modeling_qwen2_vl.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/models/qwen2_vl/test_modeling_qwen2_vl.py b/tests/models/qwen2_vl/test_modeling_qwen2_vl.py index 6673ee0745e04c..0be3608a00795f 100644 --- a/tests/models/qwen2_vl/test_modeling_qwen2_vl.py +++ b/tests/models/qwen2_vl/test_modeling_qwen2_vl.py @@ -404,7 +404,9 @@ def test_small_model_integration_test_batch_wo_image(self): "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets", "system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\nI am Qwen, a large language model created by Alibaba Cloud. I am designed to answer a wide range of questions and provide information on various topics", ] - + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets', + 'system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\nI am Qwen, a large language model created by Alibaba Cloud. I am designed to assist with various tasks and answer questions to the best of my' + ] # fmt: skip self.assertEqual( self.processor.batch_decode(output, skip_special_tokens=True), EXPECTED_DECODED_TEXT, From 2094e397807b9e6b53089b8e7589322cc5ac12da Mon Sep 17 00:00:00 2001 From: ShuaiBai623 <43326198+ShuaiBai623@users.noreply.github.com> Date: Thu, 5 Sep 2024 23:22:55 +0800 Subject: [PATCH 7/8] Update tests/models/qwen2_vl/test_modeling_qwen2_vl.py Co-authored-by: Raushan Turganbay --- tests/models/qwen2_vl/test_modeling_qwen2_vl.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/models/qwen2_vl/test_modeling_qwen2_vl.py b/tests/models/qwen2_vl/test_modeling_qwen2_vl.py index 0be3608a00795f..46360d939c9fe0 100644 --- a/tests/models/qwen2_vl/test_modeling_qwen2_vl.py +++ b/tests/models/qwen2_vl/test_modeling_qwen2_vl.py @@ -373,6 +373,9 @@ def test_small_model_integration_test_batch(self): "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets", "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets", ] + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices', + 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets' + ] # fmt: skip self.assertEqual( self.processor.batch_decode(output, skip_special_tokens=True), EXPECTED_DECODED_TEXT, From 661b148389d997d513d60d21bd429491b9f82a98 Mon Sep 17 00:00:00 2001 From: ShuaiBai623 Date: Thu, 5 Sep 2024 23:28:18 +0800 Subject: [PATCH 8/8] code quality --- tests/models/qwen2_vl/test_modeling_qwen2_vl.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/tests/models/qwen2_vl/test_modeling_qwen2_vl.py b/tests/models/qwen2_vl/test_modeling_qwen2_vl.py index 46360d939c9fe0..536e0ab54abc45 100644 --- a/tests/models/qwen2_vl/test_modeling_qwen2_vl.py +++ b/tests/models/qwen2_vl/test_modeling_qwen2_vl.py @@ -370,20 +370,13 @@ def test_small_model_integration_test_batch(self): output = model.generate(**inputs, max_new_tokens=30) EXPECTED_DECODED_TEXT = [ - "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets", - "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets", - ] 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular choices', 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets' - ] # fmt: skip + ] # fmt: skip self.assertEqual( self.processor.batch_decode(output, skip_special_tokens=True), EXPECTED_DECODED_TEXT, ) - self.assertEqual( - self.processor.batch_decode(output, skip_special_tokens=True)[0], - self.processor.batch_decode(output, skip_special_tokens=True)[1], - ) @slow def test_small_model_integration_test_batch_wo_image(self): @@ -404,12 +397,9 @@ def test_small_model_integration_test_batch_wo_image(self): output = model.generate(**inputs, max_new_tokens=30) EXPECTED_DECODED_TEXT = [ - "system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets", - "system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\nI am Qwen, a large language model created by Alibaba Cloud. I am designed to answer a wide range of questions and provide information on various topics", - ] 'system\nYou are a helpful assistant.\nuser\nWhat kind of dog is this?\nassistant\nThe dog in the picture appears to be a Labrador Retriever. Labradors are known for their friendly and intelligent nature, making them popular pets', 'system\nYou are a helpful assistant.\nuser\nWho are you?\nassistant\nI am Qwen, a large language model created by Alibaba Cloud. I am designed to assist with various tasks and answer questions to the best of my' - ] # fmt: skip + ] # fmt: skip self.assertEqual( self.processor.batch_decode(output, skip_special_tokens=True), EXPECTED_DECODED_TEXT,