From 5d6a088566b743d64defbd6bc55cdf00cbbe985e Mon Sep 17 00:00:00 2001 From: sangbumchoi Date: Wed, 24 Jul 2024 12:28:55 +0000 Subject: [PATCH] fix common processor --- src/transformers/processing_utils.py | 35 +++++++++++-------- .../test_processor_grounding_dino.py | 10 +++++- tests/test_processing_common.py | 14 ++++---- 3 files changed, 36 insertions(+), 23 deletions(-) diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py index 8e0ab968d8589a..83ad01714dbceb 100644 --- a/src/transformers/processing_utils.py +++ b/src/transformers/processing_utils.py @@ -736,12 +736,12 @@ def _merge_kwargs( The order of operations is as follows: 1) kwargs passed as before have highest priority to preserve BC. ```python - high_priority_kwargs = {"crop_size" = (224, 224), "padding" = "max_length"} + high_priority_kwargs = {"crop_size" = {"height": 222, "width": 222}, "padding" = "max_length"} processor(..., **high_priority_kwargs) ``` 2) kwargs passed as modality-specific kwargs have second priority. This is the recommended API. ```python - processor(..., text_kwargs={"padding": "max_length"}, images_kwargs={"crop_size": (224, 224)}}) + processor(..., text_kwargs={"padding": "max_length"}, images_kwargs={"crop_size": {"height": 222, "width": 222}}}) ``` 3) kwargs passed during instantiation of a modality processor have fourth priority. ```python @@ -799,14 +799,20 @@ class MyProcessingKwargs(ProcessingKwargs, CommonKwargs, TextKwargs, ImagesKwarg output_kwargs.update(default_kwargs) # gather common kwargs and remove them from individual kwargs if present - common_kwargs = { - key: value - for key, value in kwargs.items() - if key not in ModelProcessorKwargs.__annotations__["text_kwargs"].__annotations__ - and key not in ModelProcessorKwargs.__annotations__["images_kwargs"].__annotations__ - and key not in ModelProcessorKwargs.__annotations__["audio_kwargs"].__annotations__ - and key not in ModelProcessorKwargs.__annotations__["videos_kwargs"].__annotations__ - } + common_kwargs = {} + for key, value in kwargs.items(): + if key == "common_kwargs": + for common_key, common_value in value.items(): + common_kwargs[common_key] = common_value + elif key in ["text_kwargs", "images_kwargs", "audio_kwargs", "videos_kwargs"]: + pass + elif ( + key not in ModelProcessorKwargs.__annotations__["text_kwargs"].__annotations__ + and key not in ModelProcessorKwargs.__annotations__["images_kwargs"].__annotations__ + and key not in ModelProcessorKwargs.__annotations__["audio_kwargs"].__annotations__ + and key not in ModelProcessorKwargs.__annotations__["videos_kwargs"].__annotations__ + ): + common_kwargs[key] = value # ensure common kwargs are propagated to all relevant modalities for key, value in common_kwargs.items(): @@ -820,10 +826,10 @@ class MyProcessingKwargs(ProcessingKwargs, CommonKwargs, TextKwargs, ImagesKwarg # update modality kwargs with passed kwargs for modality in output_kwargs: for modality_key in ModelProcessorKwargs.__annotations__[modality].__annotations__.keys(): - if modality in kwargs and modality_key in kwargs[modality]: - output_kwargs[modality][modality_key] = kwargs[modality][modality_key] - elif modality_key in kwargs: + if modality_key in kwargs: output_kwargs[modality][modality_key] = kwargs[modality_key] + elif modality in kwargs and modality_key in kwargs[modality]: + output_kwargs[modality][modality_key] = kwargs[modality][modality_key] return output_kwargs @classmethod @@ -988,5 +994,4 @@ def apply_chat_template( ProcessorMixin.push_to_hub = copy_func(ProcessorMixin.push_to_hub) if ProcessorMixin.push_to_hub.__doc__ is not None: ProcessorMixin.push_to_hub.__doc__ = ProcessorMixin.push_to_hub.__doc__.format( - object="processor", object_class="AutoProcessor", object_files="processor files" - ) + object="processor", object_class="AutoProcessor", object_ \ No newline at end of file diff --git a/tests/models/grounding_dino/test_processor_grounding_dino.py b/tests/models/grounding_dino/test_processor_grounding_dino.py index b7a259f0c31526..448aa8f7fb6433 100644 --- a/tests/models/grounding_dino/test_processor_grounding_dino.py +++ b/tests/models/grounding_dino/test_processor_grounding_dino.py @@ -43,6 +43,7 @@ @require_torch @require_vision class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase): + from_pretrained_id = "IDEA-Research/grounding-dino-base" processor_class = GroundingDinoProcessor def setUp(self): @@ -67,6 +68,13 @@ def setUp(self): with open(self.image_processor_file, "w", encoding="utf-8") as fp: json.dump(image_processor_map, fp) + image_processor = GroundingDinoImageProcessor() + tokenizer = BertTokenizer.from_pretrained(self.from_pretrained_id) + + processor = GroundingDinoProcessor(image_processor, tokenizer) + + processor.save_pretrained(self.tmpdirname) + self.batch_size = 7 self.num_queries = 5 self.embed_dim = 5 @@ -281,4 +289,4 @@ def test_unstructured_kwargs_batched(self): ) self.assertEqual(inputs["pixel_values"].shape[2], 214) - self.assertEqual(len(inputs["input_ids"][0]), 11) + self.assertEqual(len(inputs["input_ids"][0]), 6) diff --git a/tests/test_processing_common.py b/tests/test_processing_common.py index e6128cde9bb503..b43d48e530b8ce 100644 --- a/tests/test_processing_common.py +++ b/tests/test_processing_common.py @@ -277,13 +277,13 @@ def test_doubly_passed_kwargs(self): input_str = ["lower newer"] image_input = self.prepare_image_inputs() - with self.assertRaises(ValueError): - _ = processor( - text=input_str, - images=image_input, - images_kwargs={"crop_size": {"height": 222, "width": 222}}, - crop_size={"height": 214, "width": 214}, - ) + inputs = processor( + text=input_str, + images=image_input, + images_kwargs={"size": {"height": 222, "width": 222}}, + size={"height": 35, "width": 35}, + ) + self.assertEqual(inputs["pixel_values"][0].shape[2], 35) @require_torch @require_vision