From 77ef21580165d04927cbe4d6c35d7bd1f3ad5f9a Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Wed, 11 Dec 2024 16:06:29 +0000
Subject: [PATCH] Some docs

---
 docs/source/io.rst      | 11 ++++++-----
 test/test_image.py      |  4 ----
 torchvision/io/image.py | 39 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 45 insertions(+), 9 deletions(-)

diff --git a/docs/source/io.rst b/docs/source/io.rst
index 656e3042f8c..c3f2d658014 100644
--- a/docs/source/io.rst
+++ b/docs/source/io.rst
@@ -9,8 +9,8 @@ images and videos.
 Image Decoding
 --------------
 
-Torchvision currently supports decoding JPEG, PNG, WEBP and GIF images. JPEG
-decoding can also be done on CUDA GPUs.
+Torchvision currently supports decoding JPEG, PNG, WEBP, GIF, AVIF, and HEIC
+images. JPEG decoding can also be done on CUDA GPUs.
 
 The main entry point is the :func:`~torchvision.io.decode_image` function, which
 you can use as an alternative to ``PIL.Image.open()``. It will decode images
@@ -30,9 +30,10 @@ run transforms/preproc natively on tensors.
 
 
 :func:`~torchvision.io.decode_image` will automatically detect the image format,
-and call the corresponding decoder. You can also use the lower-level
-format-specific decoders which can be more powerful, e.g. if you want to
-encode/decode JPEGs on CUDA.
+and call the corresponding decoder (except for HEIC and AVIF images, see details
+in :func:`~torchvision.io.decode_avif` and :func:`~torchvision.io.decode_heic`).
+You can also use the lower-level format-specific decoders which can be more
+powerful, e.g. if you want to encode/decode JPEGs on CUDA.
 
 .. autosummary::
     :toctree: generated/
diff --git a/test/test_image.py b/test/test_image.py
index 043c4a3b64e..b8e96773267 100644
--- a/test/test_image.py
+++ b/test/test_image.py
@@ -925,9 +925,7 @@ def test_decode_webp_against_pil(decode_fun, scripted, mode, pil_mode, filename)
     img += 123  # make sure image buffer wasn't freed by underlying decoding lib
 
 
-# TODO_AVIF_HEIC make decode_image work
 @pytest.mark.skipif(not IS_LINUX, reason=HEIC_AVIF_MESSAGE)
-# @pytest.mark.parametrize("decode_fun", (decode_avif, decode_image))
 @pytest.mark.parametrize("decode_fun", (decode_avif,))
 def test_decode_avif(decode_fun):
     encoded_bytes = read_file(next(get_images(FAKEDATA_DIR, ".avif")))
@@ -1016,8 +1014,6 @@ def test_decode_avif_heic_against_pil(decode_fun, mode, pil_mode, filename):
     torch.testing.assert_close(img, from_pil, rtol=0, atol=3)
 
 
-# TODO_AVIF_HEIC make decode_image work
-# @pytest.mark.parametrize("decode_fun", (decode_heic, decode_image))
 @pytest.mark.skipif(not IS_LINUX, reason=HEIC_AVIF_MESSAGE)
 @pytest.mark.parametrize("decode_fun", (decode_heic,))
 def test_decode_heic(decode_fun):
diff --git a/torchvision/io/image.py b/torchvision/io/image.py
index e6b53e425fe..023898f33c6 100644
--- a/torchvision/io/image.py
+++ b/torchvision/io/image.py
@@ -296,6 +296,12 @@ def decode_image(
     after this function to convert the decoded image into a uint8 or float
     tensor.
 
+    .. note::
+
+        ``decode_image()`` doesn't work yet on AVIF or HEIC images. For these
+        formats, directly call  :func:`~torchvision.io.decode_avif` or
+        :func:`~torchvision.io.decode_heic`.
+
     Args:
         input (Tensor or str or ``pathlib.Path``): The image to decode. If a
             tensor is passed, it must be one dimensional uint8 tensor containing
@@ -384,6 +390,17 @@ def decode_webp(
 # The ops (torch.ops.extra_decoders_ns.decode_*) are otherwise torchscript-able,
 # and users who need torchscript can always just wrap those.
 
+# TODO_AVIF_HEIC: decode_image() should work for those. The key technical issue
+# we have here is that the format detection logic of decode_image() is
+# implemented in torchvision, and torchvision has zero knowledge of
+# torchvision-extra-decoders, so we cannot call the AVIF/HEIC C++ decoders
+# (those in torchvision-extra-decoders) from there.
+# A trivial check that could be done within torchvision would be to check the
+# file extension, if a path was passed. We could also just implement the
+# AVIF/HEIC detection logic in Python as a fallback, if the file detection
+# didn't find any format. In any case: properly determining whether a file is
+# HEIC is far from trivial, and relying on libmagic would probably be best
+
 
 _EXTRA_DECODERS_ALREADY_LOADED = False
 
@@ -423,6 +440,17 @@ def _load_extra_decoders_once():
 def decode_avif(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANGED) -> torch.Tensor:
     """Decode an AVIF image into a 3 dimensional RGB[A] Tensor.
 
+    .. warning::
+        In order to enable the AVIF decoding capabilities of torchvision, you
+        first need to run ``pip install torchvision-extra-decoders``. Just
+        install the package, you don't need to update your code. This is only
+        supported on Linux, and this feature is still in BETA stage. Please let
+        us know of any issue:
+        https://github.com/pytorch/vision/issues/new/choose. Note that
+        `torchvision-extra-decoders
+        <https://github.com/pytorch-labs/torchvision-extra-decoders/>`_ is
+        released under the LGPL license.
+
     The values of the output tensor are in uint8 in [0, 255] for most images. If
     the image has a bit-depth of more than 8, then the output tensor is uint16
     in [0, 65535]. Since uint16 support is limited in pytorch, we recommend
@@ -449,6 +477,17 @@ def decode_avif(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANG
 def decode_heic(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANGED) -> torch.Tensor:
     """Decode an HEIC image into a 3 dimensional RGB[A] Tensor.
 
+    .. warning::
+        In order to enable the AVIF decoding capabilities of torchvision, you
+        first need to run ``pip install torchvision-extra-decoders``. Just
+        install the package, you don't need to update your code. This is only
+        supported on Linux, and this feature is still in BETA stage. Please let
+        us know of any issue:
+        https://github.com/pytorch/vision/issues/new/choose. Note that
+        `torchvision-extra-decoders
+        <https://github.com/pytorch-labs/torchvision-extra-decoders/>`_ is
+        released under the LGPL license.
+
     The values of the output tensor are in uint8 in [0, 255] for most images. If
     the image has a bit-depth of more than 8, then the output tensor is uint16
     in [0, 65535]. Since uint16 support is limited in pytorch, we recommend