From 77ef21580165d04927cbe4d6c35d7bd1f3ad5f9a Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 11 Dec 2024 16:06:29 +0000 Subject: [PATCH] Some docs --- docs/source/io.rst | 11 ++++++----- test/test_image.py | 4 ---- torchvision/io/image.py | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 9 deletions(-) diff --git a/docs/source/io.rst b/docs/source/io.rst index 656e3042f8c..c3f2d658014 100644 --- a/docs/source/io.rst +++ b/docs/source/io.rst @@ -9,8 +9,8 @@ images and videos. Image Decoding -------------- -Torchvision currently supports decoding JPEG, PNG, WEBP and GIF images. JPEG -decoding can also be done on CUDA GPUs. +Torchvision currently supports decoding JPEG, PNG, WEBP, GIF, AVIF, and HEIC +images. JPEG decoding can also be done on CUDA GPUs. The main entry point is the :func:`~torchvision.io.decode_image` function, which you can use as an alternative to ``PIL.Image.open()``. It will decode images @@ -30,9 +30,10 @@ run transforms/preproc natively on tensors. :func:`~torchvision.io.decode_image` will automatically detect the image format, -and call the corresponding decoder. You can also use the lower-level -format-specific decoders which can be more powerful, e.g. if you want to -encode/decode JPEGs on CUDA. +and call the corresponding decoder (except for HEIC and AVIF images, see details +in :func:`~torchvision.io.decode_avif` and :func:`~torchvision.io.decode_heic`). +You can also use the lower-level format-specific decoders which can be more +powerful, e.g. if you want to encode/decode JPEGs on CUDA. .. autosummary:: :toctree: generated/ diff --git a/test/test_image.py b/test/test_image.py index 043c4a3b64e..b8e96773267 100644 --- a/test/test_image.py +++ b/test/test_image.py @@ -925,9 +925,7 @@ def test_decode_webp_against_pil(decode_fun, scripted, mode, pil_mode, filename) img += 123 # make sure image buffer wasn't freed by underlying decoding lib -# TODO_AVIF_HEIC make decode_image work @pytest.mark.skipif(not IS_LINUX, reason=HEIC_AVIF_MESSAGE) -# @pytest.mark.parametrize("decode_fun", (decode_avif, decode_image)) @pytest.mark.parametrize("decode_fun", (decode_avif,)) def test_decode_avif(decode_fun): encoded_bytes = read_file(next(get_images(FAKEDATA_DIR, ".avif"))) @@ -1016,8 +1014,6 @@ def test_decode_avif_heic_against_pil(decode_fun, mode, pil_mode, filename): torch.testing.assert_close(img, from_pil, rtol=0, atol=3) -# TODO_AVIF_HEIC make decode_image work -# @pytest.mark.parametrize("decode_fun", (decode_heic, decode_image)) @pytest.mark.skipif(not IS_LINUX, reason=HEIC_AVIF_MESSAGE) @pytest.mark.parametrize("decode_fun", (decode_heic,)) def test_decode_heic(decode_fun): diff --git a/torchvision/io/image.py b/torchvision/io/image.py index e6b53e425fe..023898f33c6 100644 --- a/torchvision/io/image.py +++ b/torchvision/io/image.py @@ -296,6 +296,12 @@ def decode_image( after this function to convert the decoded image into a uint8 or float tensor. + .. note:: + + ``decode_image()`` doesn't work yet on AVIF or HEIC images. For these + formats, directly call :func:`~torchvision.io.decode_avif` or + :func:`~torchvision.io.decode_heic`. + Args: input (Tensor or str or ``pathlib.Path``): The image to decode. If a tensor is passed, it must be one dimensional uint8 tensor containing @@ -384,6 +390,17 @@ def decode_webp( # The ops (torch.ops.extra_decoders_ns.decode_*) are otherwise torchscript-able, # and users who need torchscript can always just wrap those. +# TODO_AVIF_HEIC: decode_image() should work for those. The key technical issue +# we have here is that the format detection logic of decode_image() is +# implemented in torchvision, and torchvision has zero knowledge of +# torchvision-extra-decoders, so we cannot call the AVIF/HEIC C++ decoders +# (those in torchvision-extra-decoders) from there. +# A trivial check that could be done within torchvision would be to check the +# file extension, if a path was passed. We could also just implement the +# AVIF/HEIC detection logic in Python as a fallback, if the file detection +# didn't find any format. In any case: properly determining whether a file is +# HEIC is far from trivial, and relying on libmagic would probably be best + _EXTRA_DECODERS_ALREADY_LOADED = False @@ -423,6 +440,17 @@ def _load_extra_decoders_once(): def decode_avif(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANGED) -> torch.Tensor: """Decode an AVIF image into a 3 dimensional RGB[A] Tensor. + .. warning:: + In order to enable the AVIF decoding capabilities of torchvision, you + first need to run ``pip install torchvision-extra-decoders``. Just + install the package, you don't need to update your code. This is only + supported on Linux, and this feature is still in BETA stage. Please let + us know of any issue: + https://github.com/pytorch/vision/issues/new/choose. Note that + `torchvision-extra-decoders + `_ is + released under the LGPL license. + The values of the output tensor are in uint8 in [0, 255] for most images. If the image has a bit-depth of more than 8, then the output tensor is uint16 in [0, 65535]. Since uint16 support is limited in pytorch, we recommend @@ -449,6 +477,17 @@ def decode_avif(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANG def decode_heic(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANGED) -> torch.Tensor: """Decode an HEIC image into a 3 dimensional RGB[A] Tensor. + .. warning:: + In order to enable the AVIF decoding capabilities of torchvision, you + first need to run ``pip install torchvision-extra-decoders``. Just + install the package, you don't need to update your code. This is only + supported on Linux, and this feature is still in BETA stage. Please let + us know of any issue: + https://github.com/pytorch/vision/issues/new/choose. Note that + `torchvision-extra-decoders + `_ is + released under the LGPL license. + The values of the output tensor are in uint8 in [0, 255] for most images. If the image has a bit-depth of more than 8, then the output tensor is uint16 in [0, 65535]. Since uint16 support is limited in pytorch, we recommend