From e41bdd8660a4d875930873ae33d1dbb9e791670e Mon Sep 17 00:00:00 2001 From: nandwalritik Date: Wed, 10 Aug 2022 10:49:12 +0530 Subject: [PATCH 01/13] Add initial files for depth estimation pipelines --- docs/source/en/main_classes/pipelines.mdx | 12 +++ docs/source/en/model_doc/auto.mdx | 4 + src/transformers/__init__.py | 6 ++ src/transformers/models/auto/__init__.py | 4 + src/transformers/models/auto/modeling_auto.py | 13 +++ src/transformers/pipelines/__init__.py | 11 +++ .../pipelines/depth_estimation.py | 90 +++++++++++++++++++ src/transformers/utils/dummy_pt_objects.py | 7 ++ 8 files changed, 147 insertions(+) create mode 100644 src/transformers/pipelines/depth_estimation.py diff --git a/docs/source/en/main_classes/pipelines.mdx b/docs/source/en/main_classes/pipelines.mdx index 5374f1a4003a..8bd87dc816d4 100644 --- a/docs/source/en/main_classes/pipelines.mdx +++ b/docs/source/en/main_classes/pipelines.mdx @@ -25,7 +25,11 @@ There are two categories of pipeline abstractions to be aware about: - [`AudioClassificationPipeline`] - [`AutomaticSpeechRecognitionPipeline`] - [`ConversationalPipeline`] +<<<<<<< HEAD - [`DocumentQuestionAnsweringPipeline`] +======= + - [`DepthEstimationPipeline`] +>>>>>>> Add initial files for depth estimation pipelines - [`FeatureExtractionPipeline`] - [`FillMaskPipeline`] - [`ImageClassificationPipeline`] @@ -344,12 +348,20 @@ That should enable you to do all the custom code you want. - __call__ - all +<<<<<<< HEAD ### DocumentQuestionAnsweringPipeline [[autodoc]] DocumentQuestionAnsweringPipeline - __call__ - all +======= +### DepthEstimationPipeline +[[autodoc]] DepthEstimationPipeline + - __call__ + - all + +>>>>>>> Add initial files for depth estimation pipelines ### FeatureExtractionPipeline [[autodoc]] FeatureExtractionPipeline diff --git a/docs/source/en/model_doc/auto.mdx b/docs/source/en/model_doc/auto.mdx index 01db8c4b1f7b..a6426eb3c2cc 100644 --- a/docs/source/en/model_doc/auto.mdx +++ b/docs/source/en/model_doc/auto.mdx @@ -82,6 +82,10 @@ Likewise, if your `NewModel` is a subclass of [`PreTrainedModel`], make sure its [[autodoc]] AutoModelForCausalLM +## AutoModelForDepthEstimation + +[[autodoc]] AutoModelForDepthEstimation + ## AutoModelForMaskedLM [[autodoc]] AutoModelForMaskedLM diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py index bb3664049f1a..dc77da0e967b 100755 --- a/src/transformers/__init__.py +++ b/src/transformers/__init__.py @@ -420,6 +420,7 @@ "ConversationalPipeline", "CsvPipelineDataFormat", "DocumentQuestionAnsweringPipeline", + "DepthEstimationPipeline", "FeatureExtractionPipeline", "FillMaskPipeline", "ImageClassificationPipeline", @@ -858,6 +859,7 @@ "MODEL_FOR_CAUSAL_LM_MAPPING", "MODEL_FOR_CTC_MAPPING", "MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING", + "MODEL_FOR_DEPTH_ESTIMATION_MAPPING", "MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING", "MODEL_FOR_IMAGE_SEGMENTATION_MAPPING", "MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING", @@ -887,6 +889,7 @@ "AutoModelForCausalLM", "AutoModelForCTC", "AutoModelForDocumentQuestionAnswering", + "AutoModelForDepthEstimation", "AutoModelForImageClassification", "AutoModelForImageSegmentation", "AutoModelForInstanceSegmentation", @@ -3398,6 +3401,7 @@ ConversationalPipeline, CsvPipelineDataFormat, DocumentQuestionAnsweringPipeline, + DepthEstimationPipeline, FeatureExtractionPipeline, FillMaskPipeline, ImageClassificationPipeline, @@ -3767,6 +3771,7 @@ MODEL_FOR_CAUSAL_LM_MAPPING, MODEL_FOR_CTC_MAPPING, MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING, + MODEL_FOR_DEPTH_ESTIMATION_MAPPING, MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING, MODEL_FOR_IMAGE_SEGMENTATION_MAPPING, MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING, @@ -3796,6 +3801,7 @@ AutoModelForCausalLM, AutoModelForCTC, AutoModelForDocumentQuestionAnswering, + AutoModelForDepthEstimation, AutoModelForImageClassification, AutoModelForImageSegmentation, AutoModelForInstanceSegmentation, diff --git a/src/transformers/models/auto/__init__.py b/src/transformers/models/auto/__init__.py index 1964c73938f6..d734df63bd2d 100644 --- a/src/transformers/models/auto/__init__.py +++ b/src/transformers/models/auto/__init__.py @@ -48,6 +48,7 @@ "MODEL_FOR_CAUSAL_LM_MAPPING", "MODEL_FOR_CTC_MAPPING", "MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING", + "MODEL_FOR_DEPTH_ESTIMATION_MAPPING", "MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING", "MODEL_FOR_IMAGE_SEGMENTATION_MAPPING", "MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING", @@ -76,6 +77,7 @@ "AutoModelForAudioXVector", "AutoModelForCausalLM", "AutoModelForCTC", + "AutoModelForDepthEstimation", "AutoModelForImageClassification", "AutoModelForImageSegmentation", "AutoModelForInstanceSegmentation", @@ -198,6 +200,7 @@ MODEL_FOR_CAUSAL_LM_MAPPING, MODEL_FOR_CTC_MAPPING, MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING, + MODEL_FOR_DEPTH_ESTIMATION_MAPPING, MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING, MODEL_FOR_IMAGE_SEGMENTATION_MAPPING, MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING, @@ -227,6 +230,7 @@ AutoModelForCausalLM, AutoModelForCTC, AutoModelForDocumentQuestionAnswering, + AutoModelForDepthEstimation, AutoModelForImageClassification, AutoModelForImageSegmentation, AutoModelForInstanceSegmentation, diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py index edd61e1da9b6..9bd1e0ebd603 100644 --- a/src/transformers/models/auto/modeling_auto.py +++ b/src/transformers/models/auto/modeling_auto.py @@ -479,6 +479,13 @@ ] ) +MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES = OrderedDict( + [ + # Model for depth estimation mapping + ("dpt", "DptForDepthEstimation"), + ("glpn", "GlpnForDepthEstimation"), + ] +) MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES = OrderedDict( [ # Model for Seq2Seq Causal LM mapping @@ -841,6 +848,7 @@ MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING = _LazyAutoMapping( CONFIG_MAPPING_NAMES, MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMES ) +MODEL_FOR_DEPTH_ESTIMATION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES) MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING = _LazyAutoMapping( CONFIG_MAPPING_NAMES, MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES ) @@ -1034,6 +1042,11 @@ class AutoModelForZeroShotObjectDetection(_BaseAutoModelClass): AutoModelForZeroShotObjectDetection = auto_class_update( AutoModelForZeroShotObjectDetection, head_doc="zero-shot object detection" ) +class AutoModelForDepthEstimation(_BaseAutoModelClass): + _model_mapping = MODEL_FOR_DEPTH_ESTIMATION_MAPPING + + +AutoModelForDepthEstimation = auto_class_update(AutoModelForDepthEstimation, head_doc="depth estimation") class AutoModelForVideoClassification(_BaseAutoModelClass): diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py index 0a878728185a..caefbcb720c1 100755 --- a/src/transformers/pipelines/__init__.py +++ b/src/transformers/pipelines/__init__.py @@ -32,6 +32,7 @@ from ..feature_extraction_utils import PreTrainedFeatureExtractor from ..models.auto.configuration_auto import AutoConfig from ..models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor +from ..models.auto.modeling_auto import AutoModelForDepthEstimation from ..models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer from ..tokenization_utils import PreTrainedTokenizer from ..tokenization_utils_fast import PreTrainedTokenizerFast @@ -52,6 +53,7 @@ ) from .conversational import Conversation, ConversationalPipeline from .document_question_answering import DocumentQuestionAnsweringPipeline +from .depth_estimation import DepthEstimationPipeline from .feature_extraction import FeatureExtractionPipeline from .fill_mask import FillMaskPipeline from .image_classification import ImageClassificationPipeline @@ -337,12 +339,21 @@ "default": {"model": {"pt": ("facebook/detr-resnet-50", "2729413")}}, "type": "image", }, +<<<<<<< HEAD "zero-shot-object-detection": { "impl": ZeroShotObjectDetectionPipeline, "tf": (), "pt": (AutoModelForZeroShotObjectDetection,) if is_torch_available() else (), "default": {"model": {"pt": ("google/owlvit-base-patch32", "17740e1")}}, "type": "multimodal", +======= + "depth-estimation": { + "impl": DepthEstimationPipeline, + "tf": (), + "pt": (AutoModelForDepthEstimation,) if is_torch_available() else (), + "default": {"model": {"pt": ("Intel/dpt-large", "e93beec")}}, + "type": "image", +>>>>>>> Add initial files for depth estimation pipelines }, } diff --git a/src/transformers/pipelines/depth_estimation.py b/src/transformers/pipelines/depth_estimation.py new file mode 100644 index 000000000000..842ee2e65bed --- /dev/null +++ b/src/transformers/pipelines/depth_estimation.py @@ -0,0 +1,90 @@ +from typing import List, Union + +import numpy as np +import torch + +from ..utils import add_end_docstrings, is_torch_available, is_vision_available, logging, requires_backends +from .base import PIPELINE_INIT_ARGS, Pipeline + + +if is_vision_available(): + from PIL import Image + + from ..image_utils import load_image + +if is_torch_available(): + from ..models.auto.modeling_auto import MODEL_FOR_DEPTH_ESTIMATION_MAPPING + +logger = logging.get_logger(__name__) + + +@add_end_docstrings(PIPELINE_INIT_ARGS) +class DepthEstimationPipeline(Pipeline): + """ + Depth estimation pipeline using any `AutoModelForDepthEstimation`. This pipeline predicts the depth of an image. + + This depth estimation pipeline can currently be loaded from [`pipeline`] using the following task identifier: + `"depth-estimation"`. + + See the list of available models on [huggingface.co/models](https://huggingface.co/models?filter=depth-estimation). + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + requires_backends(self, "vision") + self.check_model_type(MODEL_FOR_DEPTH_ESTIMATION_MAPPING) + + def __call__(self, images: Union[str, List[str], "Image.Image", List["Image.Image"]], **kwargs): + """ + Assign labels to the image(s) passed as inputs. + + Args: + images (`str`, `List[str]`, `PIL.Image` or `List[PIL.Image]`): + The pipeline handles three types of images: + + - A string containing a http link pointing to an image + - A string containing a local path to an image + - An image loaded in PIL directly + + The pipeline accepts either a single image or a batch of images, which must then be passed as a string. + Images in a batch must all be in the same format: all as http links, all as local paths, or all as PIL + images. + top_k (`int`, *optional*, defaults to 5): + The number of top labels that will be returned by the pipeline. If the provided number is higher than + the number of labels available in the model configuration, it will default to the number of labels. + + Return: + A dictionary or a list of dictionaries containing result. If the input is a single image, will return a + dictionary, if the input is a list of several images, will return a list of dictionaries corresponding to + the images. + + The dictionaries contain the following keys: + + - **label** (`str`) -- The label identified by the model. + - **score** (`int`) -- The score attributed by the model for that label. + """ + return super().__call__(images, **kwargs) + + def preprocess(self, image): + image = load_image(image) + self.image_size = image.size + model_inputs = self.feature_extractor(images=image, return_tensors=self.framework) + return model_inputs + + def _forward(self, model_inputs): + model_outputs = self.model(**model_inputs) + return model_outputs + + def postprocess(self, model_outputs): + predicted_depth = model_outputs.predicted_depth + prediction = torch.nn.functional.interpolate( + predicted_depth.unsqueeze(1), size=self.image_size[::-1], mode="bicubic", align_corners=False + ) + output = prediction.squeeze().cpu().numpy() + formatted = (output * 255 / np.max(output)).astype("uint8") + depth = Image.fromarray(formatted) + output_dict = {} + output_dict["predicted_depth"] = predicted_depth + output_dict["prediction"] = prediction + output_dict["depth"] = depth + return output_dict diff --git a/src/transformers/utils/dummy_pt_objects.py b/src/transformers/utils/dummy_pt_objects.py index 1546700c5c12..7facb1f02a26 100644 --- a/src/transformers/utils/dummy_pt_objects.py +++ b/src/transformers/utils/dummy_pt_objects.py @@ -359,6 +359,7 @@ def load_tf_weights_in_albert(*args, **kwargs): MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING = None +MODEL_FOR_DEPTH_ESTIMATION_MAPPING = None MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING = None @@ -475,6 +476,12 @@ class AutoModelForDocumentQuestionAnswering(metaclass=DummyObject): def __init__(self, *args, **kwargs): requires_backends(self, ["torch"]) +class AutoModelForDepthEstimation(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + class AutoModelForImageClassification(metaclass=DummyObject): _backends = ["torch"] From 12cf1c34827f61660c44714b03c9d30fc61a2bd2 Mon Sep 17 00:00:00 2001 From: nandwalritik Date: Wed, 31 Aug 2022 14:37:40 +0530 Subject: [PATCH 02/13] Add test file for depth estimation pipeline --- .../test_pipelines_depth_estimation.py | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 tests/pipelines/test_pipelines_depth_estimation.py diff --git a/tests/pipelines/test_pipelines_depth_estimation.py b/tests/pipelines/test_pipelines_depth_estimation.py new file mode 100644 index 000000000000..687d87bebd29 --- /dev/null +++ b/tests/pipelines/test_pipelines_depth_estimation.py @@ -0,0 +1,77 @@ +# Copyright 2021 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import hashlib +import unittest + +from datasets import load_dataset + +from transformers import ( + MODEL_FOR_DEPTH_ESTIMATION_MAPPING, + is_vision_available +) +from transformers.pipelines import DepthEstimationPipeline, depth_estimation, pipeline +from transformers.testing_utils import ( + is_pipeline_test, + require_tf, + require_timm, + require_torch, + require_vision, +) + +from .test_pipelines_common import ANY, PipelineTestCaseMeta + + +if is_vision_available(): + from PIL import Image +else: + + class Image: + @staticmethod + def open(*args, **kwargs): + pass + +def hashimage(image: Image) -> str: + m = hashlib.md5(image.tobytes()) + return m.hexdigest() + +@require_vision +@require_timm +@require_torch +@is_pipeline_test +class DepthEstimationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): + model_mapping = MODEL_FOR_DEPTH_ESTIMATION_MAPPING + + def get_test_pipeline(self, model, tokenizer, feature_extractor): + depth_estimator = DepthEstimationPipeline(model=model, feature_extractor=feature_extractor) + return depth_estimator, [ + "./tests/fixtures/tests_samples/COCO/000000039769.png", + "./tests/fixtures/tests_samples/COCO/000000039769.png", + ] + + def run_pipeline_test(self, depth_estimator, examples): + ... + + @require_tf + @unittest.skip("Depth estimation is not implemented in TF") + def test_small_model_tf(self): + pass + + @require_torch + def test_small_model_pt(self): + model_id = "Intel/dpt-large" + depth_estimator = pipeline("depth-estimation",model=model_id) + outputs = depth_estimator("http://images.cocodataset.org/val2017/000000039769.jpg") + outputs["depth"]=hashimage(outputs["depth"]) + self.assertEqual(outputs["depth"],"906b03064c8c68dae2b1f1f96c7c48f5") From c48bbc836676e2a719834e1b31430ce8bb293291 Mon Sep 17 00:00:00 2001 From: nandwalritik Date: Wed, 31 Aug 2022 14:38:28 +0530 Subject: [PATCH 03/13] Update model mapping names --- src/transformers/models/auto/modeling_auto.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py index 9bd1e0ebd603..a00017ec6e81 100644 --- a/src/transformers/models/auto/modeling_auto.py +++ b/src/transformers/models/auto/modeling_auto.py @@ -482,8 +482,8 @@ MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES = OrderedDict( [ # Model for depth estimation mapping - ("dpt", "DptForDepthEstimation"), - ("glpn", "GlpnForDepthEstimation"), + ("dpt", "DPTForDepthEstimation"), + ("glpn", "GLPNForDepthEstimation"), ] ) MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES = OrderedDict( From 88fa61efb849822d6c1a97791fe522626ce8a588 Mon Sep 17 00:00:00 2001 From: nandwalritik Date: Wed, 31 Aug 2022 14:38:53 +0530 Subject: [PATCH 04/13] Add updates for depth estimation output --- .../pipelines/depth_estimation.py | 4 ++- .../test_pipelines_depth_estimation.py | 30 +++++++------------ 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/src/transformers/pipelines/depth_estimation.py b/src/transformers/pipelines/depth_estimation.py index 842ee2e65bed..e05bbe88d02a 100644 --- a/src/transformers/pipelines/depth_estimation.py +++ b/src/transformers/pipelines/depth_estimation.py @@ -65,6 +65,9 @@ def __call__(self, images: Union[str, List[str], "Image.Image", List["Image.Imag """ return super().__call__(images, **kwargs) + def _sanitize_parameters(self, **kwargs): + return {}, {}, {} + def preprocess(self, image): image = load_image(image) self.image_size = image.size @@ -85,6 +88,5 @@ def postprocess(self, model_outputs): depth = Image.fromarray(formatted) output_dict = {} output_dict["predicted_depth"] = predicted_depth - output_dict["prediction"] = prediction output_dict["depth"] = depth return output_dict diff --git a/tests/pipelines/test_pipelines_depth_estimation.py b/tests/pipelines/test_pipelines_depth_estimation.py index 687d87bebd29..ec6e6bfdb34b 100644 --- a/tests/pipelines/test_pipelines_depth_estimation.py +++ b/tests/pipelines/test_pipelines_depth_estimation.py @@ -15,22 +15,11 @@ import hashlib import unittest -from datasets import load_dataset +from transformers import MODEL_FOR_DEPTH_ESTIMATION_MAPPING, is_vision_available +from transformers.pipelines import DepthEstimationPipeline, pipeline +from transformers.testing_utils import is_pipeline_test, require_tf, require_timm, require_torch, require_vision -from transformers import ( - MODEL_FOR_DEPTH_ESTIMATION_MAPPING, - is_vision_available -) -from transformers.pipelines import DepthEstimationPipeline, depth_estimation, pipeline -from transformers.testing_utils import ( - is_pipeline_test, - require_tf, - require_timm, - require_torch, - require_vision, -) - -from .test_pipelines_common import ANY, PipelineTestCaseMeta +from .test_pipelines_common import PipelineTestCaseMeta if is_vision_available(): @@ -42,10 +31,12 @@ class Image: def open(*args, **kwargs): pass + def hashimage(image: Image) -> str: m = hashlib.md5(image.tobytes()) return m.hexdigest() + @require_vision @require_timm @require_torch @@ -60,8 +51,9 @@ def get_test_pipeline(self, model, tokenizer, feature_extractor): "./tests/fixtures/tests_samples/COCO/000000039769.png", ] + @unittest.skip("Skipping to check if CI passes") def run_pipeline_test(self, depth_estimator, examples): - ... + pass @require_tf @unittest.skip("Depth estimation is not implemented in TF") @@ -71,7 +63,7 @@ def test_small_model_tf(self): @require_torch def test_small_model_pt(self): model_id = "Intel/dpt-large" - depth_estimator = pipeline("depth-estimation",model=model_id) + depth_estimator = pipeline("depth-estimation", model=model_id) outputs = depth_estimator("http://images.cocodataset.org/val2017/000000039769.jpg") - outputs["depth"]=hashimage(outputs["depth"]) - self.assertEqual(outputs["depth"],"906b03064c8c68dae2b1f1f96c7c48f5") + outputs["depth"] = hashimage(outputs["depth"]) + self.assertEqual(outputs["depth"], "906b03064c8c68dae2b1f1f96c7c48f5") From 766dbae4ea842b34feb934af282521de3d6a9f89 Mon Sep 17 00:00:00 2001 From: nandwalritik Date: Mon, 5 Sep 2022 09:36:03 +0530 Subject: [PATCH 05/13] Add generic test --- .../test_pipelines_depth_estimation.py | 31 +++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/tests/pipelines/test_pipelines_depth_estimation.py b/tests/pipelines/test_pipelines_depth_estimation.py index ec6e6bfdb34b..54abef2df6b1 100644 --- a/tests/pipelines/test_pipelines_depth_estimation.py +++ b/tests/pipelines/test_pipelines_depth_estimation.py @@ -15,6 +15,8 @@ import hashlib import unittest +import torch + from transformers import MODEL_FOR_DEPTH_ESTIMATION_MAPPING, is_vision_available from transformers.pipelines import DepthEstimationPipeline, pipeline from transformers.testing_utils import is_pipeline_test, require_tf, require_timm, require_torch, require_vision @@ -51,9 +53,34 @@ def get_test_pipeline(self, model, tokenizer, feature_extractor): "./tests/fixtures/tests_samples/COCO/000000039769.png", ] - @unittest.skip("Skipping to check if CI passes") def run_pipeline_test(self, depth_estimator, examples): - pass + outputs = depth_estimator("./tests/fixtures/tests_samples/COCO/000000039769.png") + self.assertEqual(outputs, {"predicted_depth": torch.tensor, "depth": Image}) + import datasets + + dataset = datasets.load_dataset("hf-internal-testing/fixtures_image_utils", "image", split="test") + outputs = depth_estimator( + [ + Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"), + "http://images.cocodataset.org/val2017/000000039769.jpg", + # RGBA + dataset[0]["file"], + # LA + dataset[1]["file"], + # L + dataset[2]["file"], + ] + ) + self.assertEqual( + outputs, + [ + {"predicted_depth": torch.tensor, "depth": Image}, + {"predicted_depth": torch.tensor, "depth": Image}, + {"predicted_depth": torch.tensor, "depth": Image}, + {"predicted_depth": torch.tensor, "depth": Image}, + {"predicted_depth": torch.tensor, "depth": Image}, + ], + ) @require_tf @unittest.skip("Depth estimation is not implemented in TF") From 7dd8fa2bad29b96f847512ef9d75fbedf4f1bfba Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Mon, 12 Sep 2022 11:58:23 +0200 Subject: [PATCH 06/13] Hopefully fixing the tests. --- .../test_pipelines_depth_estimation.py | 40 ++++++++++++++----- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/tests/pipelines/test_pipelines_depth_estimation.py b/tests/pipelines/test_pipelines_depth_estimation.py index 54abef2df6b1..8da0c2468b73 100644 --- a/tests/pipelines/test_pipelines_depth_estimation.py +++ b/tests/pipelines/test_pipelines_depth_estimation.py @@ -19,9 +19,17 @@ from transformers import MODEL_FOR_DEPTH_ESTIMATION_MAPPING, is_vision_available from transformers.pipelines import DepthEstimationPipeline, pipeline -from transformers.testing_utils import is_pipeline_test, require_tf, require_timm, require_torch, require_vision +from transformers.testing_utils import ( + is_pipeline_test, + nested_simplify, + require_tf, + require_timm, + require_torch, + require_vision, + slow, +) -from .test_pipelines_common import PipelineTestCaseMeta +from .test_pipelines_common import ANY, PipelineTestCaseMeta if is_vision_available(): @@ -55,7 +63,7 @@ def get_test_pipeline(self, model, tokenizer, feature_extractor): def run_pipeline_test(self, depth_estimator, examples): outputs = depth_estimator("./tests/fixtures/tests_samples/COCO/000000039769.png") - self.assertEqual(outputs, {"predicted_depth": torch.tensor, "depth": Image}) + self.assertEqual({"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)}, outputs) import datasets dataset = datasets.load_dataset("hf-internal-testing/fixtures_image_utils", "image", split="test") @@ -72,14 +80,14 @@ def run_pipeline_test(self, depth_estimator, examples): ] ) self.assertEqual( - outputs, [ - {"predicted_depth": torch.tensor, "depth": Image}, - {"predicted_depth": torch.tensor, "depth": Image}, - {"predicted_depth": torch.tensor, "depth": Image}, - {"predicted_depth": torch.tensor, "depth": Image}, - {"predicted_depth": torch.tensor, "depth": Image}, + {"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)}, + {"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)}, + {"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)}, + {"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)}, + {"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)}, ], + outputs, ) @require_tf @@ -87,10 +95,20 @@ def run_pipeline_test(self, depth_estimator, examples): def test_small_model_tf(self): pass + @slow @require_torch - def test_small_model_pt(self): + def test_large_model_pt(self): model_id = "Intel/dpt-large" depth_estimator = pipeline("depth-estimation", model=model_id) outputs = depth_estimator("http://images.cocodataset.org/val2017/000000039769.jpg") outputs["depth"] = hashimage(outputs["depth"]) - self.assertEqual(outputs["depth"], "906b03064c8c68dae2b1f1f96c7c48f5") + + # This seems flaky. + # self.assertEqual(outputs["depth"], "1a39394e282e9f3b0741a90b9f108977") + self.assertEqual(nested_simplify(outputs["predicted_depth"].max().item()), 29.304) + self.assertEqual(nested_simplify(outputs["predicted_depth"].min().item()), 2.662) + + @require_torch + def test_small_model_pt(self): + # This is highly irregular to have no small tests. + self.skipTest("There is not hf-internal-testing tiny model for either GLPN nor DPT") From 2d5342711bc7c425c0f7ada0b01ce1466e2d7703 Mon Sep 17 00:00:00 2001 From: nandwalritik Date: Thu, 15 Sep 2022 15:25:04 +0530 Subject: [PATCH 07/13] Check if test passes --- tests/pipelines/test_pipelines_depth_estimation.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/pipelines/test_pipelines_depth_estimation.py b/tests/pipelines/test_pipelines_depth_estimation.py index 8da0c2468b73..d77e97e03694 100644 --- a/tests/pipelines/test_pipelines_depth_estimation.py +++ b/tests/pipelines/test_pipelines_depth_estimation.py @@ -63,7 +63,7 @@ def get_test_pipeline(self, model, tokenizer, feature_extractor): def run_pipeline_test(self, depth_estimator, examples): outputs = depth_estimator("./tests/fixtures/tests_samples/COCO/000000039769.png") - self.assertEqual({"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)}, outputs) + self.assertEqual({"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image)}, outputs) import datasets dataset = datasets.load_dataset("hf-internal-testing/fixtures_image_utils", "image", split="test") @@ -81,11 +81,11 @@ def run_pipeline_test(self, depth_estimator, examples): ) self.assertEqual( [ - {"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)}, - {"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)}, - {"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)}, - {"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)}, - {"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)}, + {"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image)}, + {"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image)}, + {"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image)}, + {"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image)}, + {"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image)}, ], outputs, ) From b7d4cafc84ff4097a28ad07c662540164f5e003b Mon Sep 17 00:00:00 2001 From: nandwalritik Date: Thu, 15 Sep 2022 16:37:58 +0530 Subject: [PATCH 08/13] Add make fixup and make fix-copies changes after rebase with main --- src/transformers/__init__.py | 8 ++++---- src/transformers/models/auto/__init__.py | 4 ++-- src/transformers/pipelines/__init__.py | 2 +- src/transformers/utils/dummy_pt_objects.py | 9 ++++++--- utils/update_metadata.py | 1 + 5 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py index dc77da0e967b..c0bc0fbd05ef 100755 --- a/src/transformers/__init__.py +++ b/src/transformers/__init__.py @@ -419,8 +419,8 @@ "Conversation", "ConversationalPipeline", "CsvPipelineDataFormat", - "DocumentQuestionAnsweringPipeline", "DepthEstimationPipeline", + "DocumentQuestionAnsweringPipeline", "FeatureExtractionPipeline", "FillMaskPipeline", "ImageClassificationPipeline", @@ -3400,8 +3400,8 @@ Conversation, ConversationalPipeline, CsvPipelineDataFormat, - DocumentQuestionAnsweringPipeline, DepthEstimationPipeline, + DocumentQuestionAnsweringPipeline, FeatureExtractionPipeline, FillMaskPipeline, ImageClassificationPipeline, @@ -3770,8 +3770,8 @@ MODEL_FOR_CAUSAL_IMAGE_MODELING_MAPPING, MODEL_FOR_CAUSAL_LM_MAPPING, MODEL_FOR_CTC_MAPPING, - MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING, MODEL_FOR_DEPTH_ESTIMATION_MAPPING, + MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING, MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING, MODEL_FOR_IMAGE_SEGMENTATION_MAPPING, MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING, @@ -3800,8 +3800,8 @@ AutoModelForAudioXVector, AutoModelForCausalLM, AutoModelForCTC, - AutoModelForDocumentQuestionAnswering, AutoModelForDepthEstimation, + AutoModelForDocumentQuestionAnswering, AutoModelForImageClassification, AutoModelForImageSegmentation, AutoModelForInstanceSegmentation, diff --git a/src/transformers/models/auto/__init__.py b/src/transformers/models/auto/__init__.py index d734df63bd2d..acb0fa8b0f1c 100644 --- a/src/transformers/models/auto/__init__.py +++ b/src/transformers/models/auto/__init__.py @@ -199,8 +199,8 @@ MODEL_FOR_CAUSAL_IMAGE_MODELING_MAPPING, MODEL_FOR_CAUSAL_LM_MAPPING, MODEL_FOR_CTC_MAPPING, - MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING, MODEL_FOR_DEPTH_ESTIMATION_MAPPING, + MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING, MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING, MODEL_FOR_IMAGE_SEGMENTATION_MAPPING, MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING, @@ -229,8 +229,8 @@ AutoModelForAudioXVector, AutoModelForCausalLM, AutoModelForCTC, - AutoModelForDocumentQuestionAnswering, AutoModelForDepthEstimation, + AutoModelForDocumentQuestionAnswering, AutoModelForImageClassification, AutoModelForImageSegmentation, AutoModelForInstanceSegmentation, diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py index caefbcb720c1..b1091ca6cb04 100755 --- a/src/transformers/pipelines/__init__.py +++ b/src/transformers/pipelines/__init__.py @@ -52,8 +52,8 @@ infer_framework_load_model, ) from .conversational import Conversation, ConversationalPipeline -from .document_question_answering import DocumentQuestionAnsweringPipeline from .depth_estimation import DepthEstimationPipeline +from .document_question_answering import DocumentQuestionAnsweringPipeline from .feature_extraction import FeatureExtractionPipeline from .fill_mask import FillMaskPipeline from .image_classification import ImageClassificationPipeline diff --git a/src/transformers/utils/dummy_pt_objects.py b/src/transformers/utils/dummy_pt_objects.py index 7facb1f02a26..474f367629e8 100644 --- a/src/transformers/utils/dummy_pt_objects.py +++ b/src/transformers/utils/dummy_pt_objects.py @@ -358,10 +358,12 @@ def load_tf_weights_in_albert(*args, **kwargs): MODEL_FOR_CTC_MAPPING = None -MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING = None MODEL_FOR_DEPTH_ESTIMATION_MAPPING = None +MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING = None + + MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING = None @@ -470,13 +472,14 @@ def __init__(self, *args, **kwargs): requires_backends(self, ["torch"]) -class AutoModelForDocumentQuestionAnswering(metaclass=DummyObject): +class AutoModelForDepthEstimation(metaclass=DummyObject): _backends = ["torch"] def __init__(self, *args, **kwargs): requires_backends(self, ["torch"]) -class AutoModelForDepthEstimation(metaclass=DummyObject): + +class AutoModelForDocumentQuestionAnswering(metaclass=DummyObject): _backends = ["torch"] def __init__(self, *args, **kwargs): diff --git a/utils/update_metadata.py b/utils/update_metadata.py index 8bb3b71672d8..5e7169c25585 100644 --- a/utils/update_metadata.py +++ b/utils/update_metadata.py @@ -101,6 +101,7 @@ "_MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES", "AutoModel", ), + ("depth-estimation", "MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES", "AutoModelForDepthEstimation"), ] From b5d1338b3ae9474aa2dbfda0017397c669510bdf Mon Sep 17 00:00:00 2001 From: nandwalritik Date: Mon, 10 Oct 2022 17:16:50 +0530 Subject: [PATCH 09/13] Rebase with main --- src/transformers/models/auto/modeling_auto.py | 2 ++ src/transformers/pipelines/__init__.py | 4 +--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py index a00017ec6e81..efd9f72e36ef 100644 --- a/src/transformers/models/auto/modeling_auto.py +++ b/src/transformers/models/auto/modeling_auto.py @@ -1042,6 +1042,8 @@ class AutoModelForZeroShotObjectDetection(_BaseAutoModelClass): AutoModelForZeroShotObjectDetection = auto_class_update( AutoModelForZeroShotObjectDetection, head_doc="zero-shot object detection" ) + + class AutoModelForDepthEstimation(_BaseAutoModelClass): _model_mapping = MODEL_FOR_DEPTH_ESTIMATION_MAPPING diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py index b1091ca6cb04..4e8faa58d2e8 100755 --- a/src/transformers/pipelines/__init__.py +++ b/src/transformers/pipelines/__init__.py @@ -339,21 +339,19 @@ "default": {"model": {"pt": ("facebook/detr-resnet-50", "2729413")}}, "type": "image", }, -<<<<<<< HEAD "zero-shot-object-detection": { "impl": ZeroShotObjectDetectionPipeline, "tf": (), "pt": (AutoModelForZeroShotObjectDetection,) if is_torch_available() else (), "default": {"model": {"pt": ("google/owlvit-base-patch32", "17740e1")}}, "type": "multimodal", -======= + }, "depth-estimation": { "impl": DepthEstimationPipeline, "tf": (), "pt": (AutoModelForDepthEstimation,) if is_torch_available() else (), "default": {"model": {"pt": ("Intel/dpt-large", "e93beec")}}, "type": "image", ->>>>>>> Add initial files for depth estimation pipelines }, } From 1883d7198d1aca6cc92cee13534dec83655e5242 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Tue, 11 Oct 2022 12:03:27 +0200 Subject: [PATCH 10/13] Fixing up depth pipeline. --- docs/source/en/main_classes/pipelines.mdx | 18 ++++++------------ src/transformers/pipelines/depth_estimation.py | 3 ++- .../test_pipelines_depth_estimation.py | 8 +++++--- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/docs/source/en/main_classes/pipelines.mdx b/docs/source/en/main_classes/pipelines.mdx index 8bd87dc816d4..bf6604715619 100644 --- a/docs/source/en/main_classes/pipelines.mdx +++ b/docs/source/en/main_classes/pipelines.mdx @@ -25,11 +25,8 @@ There are two categories of pipeline abstractions to be aware about: - [`AudioClassificationPipeline`] - [`AutomaticSpeechRecognitionPipeline`] - [`ConversationalPipeline`] -<<<<<<< HEAD - - [`DocumentQuestionAnsweringPipeline`] -======= - [`DepthEstimationPipeline`] ->>>>>>> Add initial files for depth estimation pipelines + - [`DocumentQuestionAnsweringPipeline`] - [`FeatureExtractionPipeline`] - [`FillMaskPipeline`] - [`ImageClassificationPipeline`] @@ -348,20 +345,17 @@ That should enable you to do all the custom code you want. - __call__ - all -<<<<<<< HEAD +### DepthEstimationPipeline +[[autodoc]] DepthEstimationPipeline + - __call__ + - all + ### DocumentQuestionAnsweringPipeline [[autodoc]] DocumentQuestionAnsweringPipeline - __call__ - all - -======= -### DepthEstimationPipeline -[[autodoc]] DepthEstimationPipeline - - __call__ - - all ->>>>>>> Add initial files for depth estimation pipelines ### FeatureExtractionPipeline [[autodoc]] FeatureExtractionPipeline diff --git a/src/transformers/pipelines/depth_estimation.py b/src/transformers/pipelines/depth_estimation.py index e05bbe88d02a..e826013a42f5 100644 --- a/src/transformers/pipelines/depth_estimation.py +++ b/src/transformers/pipelines/depth_estimation.py @@ -1,7 +1,6 @@ from typing import List, Union import numpy as np -import torch from ..utils import add_end_docstrings, is_torch_available, is_vision_available, logging, requires_backends from .base import PIPELINE_INIT_ARGS, Pipeline @@ -13,6 +12,8 @@ from ..image_utils import load_image if is_torch_available(): + import torch + from ..models.auto.modeling_auto import MODEL_FOR_DEPTH_ESTIMATION_MAPPING logger = logging.get_logger(__name__) diff --git a/tests/pipelines/test_pipelines_depth_estimation.py b/tests/pipelines/test_pipelines_depth_estimation.py index d77e97e03694..123c382b500d 100644 --- a/tests/pipelines/test_pipelines_depth_estimation.py +++ b/tests/pipelines/test_pipelines_depth_estimation.py @@ -15,9 +15,7 @@ import hashlib import unittest -import torch - -from transformers import MODEL_FOR_DEPTH_ESTIMATION_MAPPING, is_vision_available +from transformers import MODEL_FOR_DEPTH_ESTIMATION_MAPPING, is_torch_available, is_vision_available from transformers.pipelines import DepthEstimationPipeline, pipeline from transformers.testing_utils import ( is_pipeline_test, @@ -32,6 +30,9 @@ from .test_pipelines_common import ANY, PipelineTestCaseMeta +if is_torch_available(): + import torch + if is_vision_available(): from PIL import Image else: @@ -52,6 +53,7 @@ def hashimage(image: Image) -> str: @require_torch @is_pipeline_test class DepthEstimationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): + model_mapping = MODEL_FOR_DEPTH_ESTIMATION_MAPPING def get_test_pipeline(self, model, tokenizer, feature_extractor): From ce2027e7a7eb7c04bc64d6ee225a2b968c0e6ce8 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Tue, 11 Oct 2022 12:11:48 +0200 Subject: [PATCH 11/13] This is not used anymore. --- tests/pipelines/test_pipelines_depth_estimation.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/tests/pipelines/test_pipelines_depth_estimation.py b/tests/pipelines/test_pipelines_depth_estimation.py index 123c382b500d..1528640257e8 100644 --- a/tests/pipelines/test_pipelines_depth_estimation.py +++ b/tests/pipelines/test_pipelines_depth_estimation.py @@ -17,15 +17,7 @@ from transformers import MODEL_FOR_DEPTH_ESTIMATION_MAPPING, is_torch_available, is_vision_available from transformers.pipelines import DepthEstimationPipeline, pipeline -from transformers.testing_utils import ( - is_pipeline_test, - nested_simplify, - require_tf, - require_timm, - require_torch, - require_vision, - slow, -) +from transformers.testing_utils import nested_simplify, require_tf, require_timm, require_torch, require_vision, slow from .test_pipelines_common import ANY, PipelineTestCaseMeta @@ -51,7 +43,6 @@ def hashimage(image: Image) -> str: @require_vision @require_timm @require_torch -@is_pipeline_test class DepthEstimationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): model_mapping = MODEL_FOR_DEPTH_ESTIMATION_MAPPING From 31a08bd3c192a67b246d929a4f72bb0a91f6166b Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Tue, 11 Oct 2022 12:25:49 +0200 Subject: [PATCH 12/13] Fixing the test. `Image` is a module `Image.Image` is the type. --- tests/pipelines/test_pipelines_depth_estimation.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/pipelines/test_pipelines_depth_estimation.py b/tests/pipelines/test_pipelines_depth_estimation.py index 1528640257e8..d42ba2a067cb 100644 --- a/tests/pipelines/test_pipelines_depth_estimation.py +++ b/tests/pipelines/test_pipelines_depth_estimation.py @@ -56,7 +56,7 @@ def get_test_pipeline(self, model, tokenizer, feature_extractor): def run_pipeline_test(self, depth_estimator, examples): outputs = depth_estimator("./tests/fixtures/tests_samples/COCO/000000039769.png") - self.assertEqual({"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image)}, outputs) + self.assertEqual({"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)}, outputs) import datasets dataset = datasets.load_dataset("hf-internal-testing/fixtures_image_utils", "image", split="test") @@ -74,11 +74,11 @@ def run_pipeline_test(self, depth_estimator, examples): ) self.assertEqual( [ - {"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image)}, - {"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image)}, - {"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image)}, - {"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image)}, - {"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image)}, + {"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)}, + {"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)}, + {"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)}, + {"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)}, + {"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)}, ], outputs, ) From cd48ed514cd855c03a5cb4c7ed843e4f2de31885 Mon Sep 17 00:00:00 2001 From: Ritik Nandwal <48522685+nandwalritik@users.noreply.github.com> Date: Wed, 12 Oct 2022 10:21:17 +0530 Subject: [PATCH 13/13] Update docs/source/en/main_classes/pipelines.mdx Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> --- docs/source/en/main_classes/pipelines.mdx | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/en/main_classes/pipelines.mdx b/docs/source/en/main_classes/pipelines.mdx index bf6604715619..ef6adc481078 100644 --- a/docs/source/en/main_classes/pipelines.mdx +++ b/docs/source/en/main_classes/pipelines.mdx @@ -355,7 +355,6 @@ That should enable you to do all the custom code you want. [[autodoc]] DocumentQuestionAnsweringPipeline - __call__ - all - ### FeatureExtractionPipeline [[autodoc]] FeatureExtractionPipeline