diff --git a/docs/source/io.rst b/docs/source/io.rst
index 656e3042f8c..c3f2d658014 100644
--- a/docs/source/io.rst
+++ b/docs/source/io.rst
@@ -9,8 +9,8 @@ images and videos.
Image Decoding
--------------
-Torchvision currently supports decoding JPEG, PNG, WEBP and GIF images. JPEG
-decoding can also be done on CUDA GPUs.
+Torchvision currently supports decoding JPEG, PNG, WEBP, GIF, AVIF, and HEIC
+images. JPEG decoding can also be done on CUDA GPUs.
The main entry point is the :func:`~torchvision.io.decode_image` function, which
you can use as an alternative to ``PIL.Image.open()``. It will decode images
@@ -30,9 +30,10 @@ run transforms/preproc natively on tensors.
:func:`~torchvision.io.decode_image` will automatically detect the image format,
-and call the corresponding decoder. You can also use the lower-level
-format-specific decoders which can be more powerful, e.g. if you want to
-encode/decode JPEGs on CUDA.
+and call the corresponding decoder (except for HEIC and AVIF images, see details
+in :func:`~torchvision.io.decode_avif` and :func:`~torchvision.io.decode_heic`).
+You can also use the lower-level format-specific decoders which can be more
+powerful, e.g. if you want to encode/decode JPEGs on CUDA.
.. autosummary::
:toctree: generated/
diff --git a/test/test_image.py b/test/test_image.py
index 043c4a3b64e..b8e96773267 100644
--- a/test/test_image.py
+++ b/test/test_image.py
@@ -925,9 +925,7 @@ def test_decode_webp_against_pil(decode_fun, scripted, mode, pil_mode, filename)
img += 123 # make sure image buffer wasn't freed by underlying decoding lib
-# TODO_AVIF_HEIC make decode_image work
@pytest.mark.skipif(not IS_LINUX, reason=HEIC_AVIF_MESSAGE)
-# @pytest.mark.parametrize("decode_fun", (decode_avif, decode_image))
@pytest.mark.parametrize("decode_fun", (decode_avif,))
def test_decode_avif(decode_fun):
encoded_bytes = read_file(next(get_images(FAKEDATA_DIR, ".avif")))
@@ -1016,8 +1014,6 @@ def test_decode_avif_heic_against_pil(decode_fun, mode, pil_mode, filename):
torch.testing.assert_close(img, from_pil, rtol=0, atol=3)
-# TODO_AVIF_HEIC make decode_image work
-# @pytest.mark.parametrize("decode_fun", (decode_heic, decode_image))
@pytest.mark.skipif(not IS_LINUX, reason=HEIC_AVIF_MESSAGE)
@pytest.mark.parametrize("decode_fun", (decode_heic,))
def test_decode_heic(decode_fun):
diff --git a/torchvision/io/image.py b/torchvision/io/image.py
index e6b53e425fe..023898f33c6 100644
--- a/torchvision/io/image.py
+++ b/torchvision/io/image.py
@@ -296,6 +296,12 @@ def decode_image(
after this function to convert the decoded image into a uint8 or float
tensor.
+ .. note::
+
+ ``decode_image()`` doesn't work yet on AVIF or HEIC images. For these
+ formats, directly call :func:`~torchvision.io.decode_avif` or
+ :func:`~torchvision.io.decode_heic`.
+
Args:
input (Tensor or str or ``pathlib.Path``): The image to decode. If a
tensor is passed, it must be one dimensional uint8 tensor containing
@@ -384,6 +390,17 @@ def decode_webp(
# The ops (torch.ops.extra_decoders_ns.decode_*) are otherwise torchscript-able,
# and users who need torchscript can always just wrap those.
+# TODO_AVIF_HEIC: decode_image() should work for those. The key technical issue
+# we have here is that the format detection logic of decode_image() is
+# implemented in torchvision, and torchvision has zero knowledge of
+# torchvision-extra-decoders, so we cannot call the AVIF/HEIC C++ decoders
+# (those in torchvision-extra-decoders) from there.
+# A trivial check that could be done within torchvision would be to check the
+# file extension, if a path was passed. We could also just implement the
+# AVIF/HEIC detection logic in Python as a fallback, if the file detection
+# didn't find any format. In any case: properly determining whether a file is
+# HEIC is far from trivial, and relying on libmagic would probably be best
+
_EXTRA_DECODERS_ALREADY_LOADED = False
@@ -423,6 +440,17 @@ def _load_extra_decoders_once():
def decode_avif(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANGED) -> torch.Tensor:
"""Decode an AVIF image into a 3 dimensional RGB[A] Tensor.
+ .. warning::
+ In order to enable the AVIF decoding capabilities of torchvision, you
+ first need to run ``pip install torchvision-extra-decoders``. Just
+ install the package, you don't need to update your code. This is only
+ supported on Linux, and this feature is still in BETA stage. Please let
+ us know of any issue:
+ https://github.com/pytorch/vision/issues/new/choose. Note that
+ `torchvision-extra-decoders
+ `_ is
+ released under the LGPL license.
+
The values of the output tensor are in uint8 in [0, 255] for most images. If
the image has a bit-depth of more than 8, then the output tensor is uint16
in [0, 65535]. Since uint16 support is limited in pytorch, we recommend
@@ -449,6 +477,17 @@ def decode_avif(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANG
def decode_heic(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANGED) -> torch.Tensor:
"""Decode an HEIC image into a 3 dimensional RGB[A] Tensor.
+ .. warning::
+ In order to enable the AVIF decoding capabilities of torchvision, you
+ first need to run ``pip install torchvision-extra-decoders``. Just
+ install the package, you don't need to update your code. This is only
+ supported on Linux, and this feature is still in BETA stage. Please let
+ us know of any issue:
+ https://github.com/pytorch/vision/issues/new/choose. Note that
+ `torchvision-extra-decoders
+ `_ is
+ released under the LGPL license.
+
The values of the output tensor are in uint8 in [0, 255] for most images. If
the image has a bit-depth of more than 8, then the output tensor is uint16
in [0, 65535]. Since uint16 support is limited in pytorch, we recommend