-
Notifications
You must be signed in to change notification settings - Fork 27.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add depth estimation pipeline (#18618)
* Add initial files for depth estimation pipelines * Add test file for depth estimation pipeline * Update model mapping names * Add updates for depth estimation output * Add generic test * Hopefully fixing the tests. * Check if test passes * Add make fixup and make fix-copies changes after rebase with main * Rebase with main * Fixing up depth pipeline. * This is not used anymore. * Fixing the test. `Image` is a module `Image.Image` is the type. * Update docs/source/en/main_classes/pipelines.mdx Co-authored-by: Sylvain Gugger <[email protected]> Co-authored-by: Nicolas Patry <[email protected]> Co-authored-by: Sylvain Gugger <[email protected]>
- Loading branch information
1 parent
4ed0fa3
commit e94384e
Showing
10 changed files
with
255 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
from typing import List, Union | ||
|
||
import numpy as np | ||
|
||
from ..utils import add_end_docstrings, is_torch_available, is_vision_available, logging, requires_backends | ||
from .base import PIPELINE_INIT_ARGS, Pipeline | ||
|
||
|
||
if is_vision_available(): | ||
from PIL import Image | ||
|
||
from ..image_utils import load_image | ||
|
||
if is_torch_available(): | ||
import torch | ||
|
||
from ..models.auto.modeling_auto import MODEL_FOR_DEPTH_ESTIMATION_MAPPING | ||
|
||
logger = logging.get_logger(__name__) | ||
|
||
|
||
@add_end_docstrings(PIPELINE_INIT_ARGS) | ||
class DepthEstimationPipeline(Pipeline): | ||
""" | ||
Depth estimation pipeline using any `AutoModelForDepthEstimation`. This pipeline predicts the depth of an image. | ||
This depth estimation pipeline can currently be loaded from [`pipeline`] using the following task identifier: | ||
`"depth-estimation"`. | ||
See the list of available models on [huggingface.co/models](https://huggingface.co/models?filter=depth-estimation). | ||
""" | ||
|
||
def __init__(self, *args, **kwargs): | ||
super().__init__(*args, **kwargs) | ||
requires_backends(self, "vision") | ||
self.check_model_type(MODEL_FOR_DEPTH_ESTIMATION_MAPPING) | ||
|
||
def __call__(self, images: Union[str, List[str], "Image.Image", List["Image.Image"]], **kwargs): | ||
""" | ||
Assign labels to the image(s) passed as inputs. | ||
Args: | ||
images (`str`, `List[str]`, `PIL.Image` or `List[PIL.Image]`): | ||
The pipeline handles three types of images: | ||
- A string containing a http link pointing to an image | ||
- A string containing a local path to an image | ||
- An image loaded in PIL directly | ||
The pipeline accepts either a single image or a batch of images, which must then be passed as a string. | ||
Images in a batch must all be in the same format: all as http links, all as local paths, or all as PIL | ||
images. | ||
top_k (`int`, *optional*, defaults to 5): | ||
The number of top labels that will be returned by the pipeline. If the provided number is higher than | ||
the number of labels available in the model configuration, it will default to the number of labels. | ||
Return: | ||
A dictionary or a list of dictionaries containing result. If the input is a single image, will return a | ||
dictionary, if the input is a list of several images, will return a list of dictionaries corresponding to | ||
the images. | ||
The dictionaries contain the following keys: | ||
- **label** (`str`) -- The label identified by the model. | ||
- **score** (`int`) -- The score attributed by the model for that label. | ||
""" | ||
return super().__call__(images, **kwargs) | ||
|
||
def _sanitize_parameters(self, **kwargs): | ||
return {}, {}, {} | ||
|
||
def preprocess(self, image): | ||
image = load_image(image) | ||
self.image_size = image.size | ||
model_inputs = self.feature_extractor(images=image, return_tensors=self.framework) | ||
return model_inputs | ||
|
||
def _forward(self, model_inputs): | ||
model_outputs = self.model(**model_inputs) | ||
return model_outputs | ||
|
||
def postprocess(self, model_outputs): | ||
predicted_depth = model_outputs.predicted_depth | ||
prediction = torch.nn.functional.interpolate( | ||
predicted_depth.unsqueeze(1), size=self.image_size[::-1], mode="bicubic", align_corners=False | ||
) | ||
output = prediction.squeeze().cpu().numpy() | ||
formatted = (output * 255 / np.max(output)).astype("uint8") | ||
depth = Image.fromarray(formatted) | ||
output_dict = {} | ||
output_dict["predicted_depth"] = predicted_depth | ||
output_dict["depth"] = depth | ||
return output_dict |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.