Skip to content

Commit

Permalink
Added support for EXIF orientation transform in read_image for JPEG (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
vfdev-5 authored Mar 5, 2024
1 parent bca7a49 commit f3298dc
Show file tree
Hide file tree
Showing 8 changed files with 302 additions and 15 deletions.
40 changes: 39 additions & 1 deletion test/test_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import torch
import torchvision.transforms.functional as F
from common_utils import assert_equal, needs_cuda
from PIL import __version__ as PILLOW_VERSION, Image
from PIL import __version__ as PILLOW_VERSION, Image, ImageOps
from torchvision.io.image import (
_read_png_16,
decode_image,
Expand Down Expand Up @@ -100,6 +100,44 @@ def test_decode_jpeg(img_path, pil_mode, mode):
assert abs_mean_diff < 2


@pytest.mark.parametrize("orientation", [1, 2, 3, 4, 5, 6, 7, 8, 0])
def test_decode_jpeg_with_exif_orientation(tmpdir, orientation):
fp = os.path.join(tmpdir, f"exif_oriented_{orientation}.jpg")
t = torch.randint(0, 256, size=(3, 256, 257), dtype=torch.uint8)
im = F.to_pil_image(t)
exif = im.getexif()
exif[0x0112] = orientation # set exif orientation
im.save(fp, "JPEG", exif=exif.tobytes())

data = read_file(fp)
output = decode_image(data, apply_exif_orientation=True)

pimg = Image.open(fp)
pimg = ImageOps.exif_transpose(pimg)

expected = F.pil_to_tensor(pimg)
torch.testing.assert_close(expected, output)


@pytest.mark.parametrize("size", [65533, 1, 7, 10, 23, 33])
def test_invalid_exif(tmpdir, size):
# Inspired from a PIL test:
# https://github.com/python-pillow/Pillow/blob/8f63748e50378424628155994efd7e0739a4d1d1/Tests/test_file_jpeg.py#L299
fp = os.path.join(tmpdir, "invalid_exif.jpg")
t = torch.randint(0, 256, size=(3, 256, 257), dtype=torch.uint8)
im = F.to_pil_image(t)
im.save(fp, "JPEG", exif=b"1" * size)

data = read_file(fp)
output = decode_image(data, apply_exif_orientation=True)

pimg = Image.open(fp)
pimg = ImageOps.exif_transpose(pimg)

expected = F.pil_to_tensor(pimg)
torch.testing.assert_close(expected, output)


def test_decode_jpeg_errors():
with pytest.raises(RuntimeError, match="Expected a non empty 1-dimensional tensor"):
decode_jpeg(torch.empty((100, 1), dtype=torch.uint8))
Expand Down
7 changes: 5 additions & 2 deletions torchvision/csrc/io/image/cpu/decode_image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
namespace vision {
namespace image {

torch::Tensor decode_image(const torch::Tensor& data, ImageReadMode mode) {
torch::Tensor decode_image(
const torch::Tensor& data,
ImageReadMode mode,
bool apply_exif_orientation) {
// Check that tensor is a CPU tensor
TORCH_CHECK(data.device() == torch::kCPU, "Expected a CPU tensor");
// Check that the input tensor dtype is uint8
Expand All @@ -22,7 +25,7 @@ torch::Tensor decode_image(const torch::Tensor& data, ImageReadMode mode) {
const uint8_t png_signature[4] = {137, 80, 78, 71}; // == "\211PNG"

if (memcmp(jpeg_signature, datap, 3) == 0) {
return decode_jpeg(data, mode);
return decode_jpeg(data, mode, apply_exif_orientation);
} else if (memcmp(png_signature, datap, 4) == 0) {
return decode_png(data, mode);
} else {
Expand Down
3 changes: 2 additions & 1 deletion torchvision/csrc/io/image/cpu/decode_image.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ namespace image {

C10_EXPORT torch::Tensor decode_image(
const torch::Tensor& data,
ImageReadMode mode = IMAGE_READ_MODE_UNCHANGED);
ImageReadMode mode = IMAGE_READ_MODE_UNCHANGED,
bool apply_exif_orientation = false);

} // namespace image
} // namespace vision
21 changes: 19 additions & 2 deletions torchvision/csrc/io/image/cpu/decode_jpeg.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "decode_jpeg.h"
#include "common_jpeg.h"
#include "exif.h"

namespace vision {
namespace image {
Expand All @@ -12,6 +13,7 @@ torch::Tensor decode_jpeg(const torch::Tensor& data, ImageReadMode mode) {
#else

using namespace detail;
using namespace exif_private;

namespace {

Expand Down Expand Up @@ -65,6 +67,8 @@ static void torch_jpeg_set_source_mgr(
src->len = len;
src->pub.bytes_in_buffer = len;
src->pub.next_input_byte = src->data;

jpeg_save_markers(cinfo, APP1, 0xffff);
}

inline unsigned char clamped_cmyk_rgb_convert(
Expand Down Expand Up @@ -121,7 +125,10 @@ void convert_line_cmyk_to_gray(

} // namespace

torch::Tensor decode_jpeg(const torch::Tensor& data, ImageReadMode mode) {
torch::Tensor decode_jpeg(
const torch::Tensor& data,
ImageReadMode mode,
bool apply_exif_orientation) {
C10_LOG_API_USAGE_ONCE(
"torchvision.csrc.io.image.cpu.decode_jpeg.decode_jpeg");
// Check that the input tensor dtype is uint8
Expand Down Expand Up @@ -191,6 +198,11 @@ torch::Tensor decode_jpeg(const torch::Tensor& data, ImageReadMode mode) {
jpeg_calc_output_dimensions(&cinfo);
}

int exif_orientation = -1;
if (apply_exif_orientation) {
exif_orientation = fetch_exif_orientation(&cinfo);
}

jpeg_start_decompress(&cinfo);

int height = cinfo.output_height;
Expand Down Expand Up @@ -227,7 +239,12 @@ torch::Tensor decode_jpeg(const torch::Tensor& data, ImageReadMode mode) {

jpeg_finish_decompress(&cinfo);
jpeg_destroy_decompress(&cinfo);
return tensor.permute({2, 0, 1});
auto output = tensor.permute({2, 0, 1});

if (apply_exif_orientation) {
return exif_orientation_transform(output, exif_orientation);
}
return output;
}
#endif // #if !JPEG_FOUND

Expand Down
3 changes: 2 additions & 1 deletion torchvision/csrc/io/image/cpu/decode_jpeg.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ namespace image {

C10_EXPORT torch::Tensor decode_jpeg(
const torch::Tensor& data,
ImageReadMode mode = IMAGE_READ_MODE_UNCHANGED);
ImageReadMode mode = IMAGE_READ_MODE_UNCHANGED,
bool apply_exif_orientation = false);

C10_EXPORT int64_t _jpeg_version();
C10_EXPORT bool _is_compiled_against_turbo();
Expand Down
212 changes: 212 additions & 0 deletions torchvision/csrc/io/image/cpu/exif.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this
license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without
modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright
notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote
products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is"
and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are
disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any
direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
// Functions in this module are taken from OpenCV
// https://github.com/opencv/opencv/blob/097891e311fae1d8354eb092a0fd0171e630d78c/modules/imgcodecs/src/exif.cpp

#include <jpeglib.h>
#include <torch/types.h>
#include <vector>

namespace vision {
namespace image {
namespace exif_private {

constexpr uint16_t APP1 = 0xe1;
constexpr uint16_t ENDIANNESS_INTEL = 0x49;
constexpr uint16_t ENDIANNESS_MOTO = 0x4d;
constexpr uint16_t REQ_EXIF_TAG_MARK = 0x2a;
constexpr uint16_t ORIENTATION_EXIF_TAG = 0x0112;
constexpr uint16_t INCORRECT_TAG = -1;

inline uint16_t get_endianness(const std::vector<unsigned char>& exif_data) {
if ((exif_data.size() < 1) ||
(exif_data.size() > 1 && exif_data[0] != exif_data[1])) {
return 0;
}
if (exif_data[0] == 'I') {
return ENDIANNESS_INTEL;
}
if (exif_data[0] == 'M') {
return ENDIANNESS_MOTO;
}
return 0;
}

inline uint16_t get_uint16(
const std::vector<unsigned char>& exif_data,
uint16_t endianness,
const size_t offset) {
if (offset + 1 >= exif_data.size()) {
return INCORRECT_TAG;
}

if (endianness == ENDIANNESS_INTEL) {
return exif_data[offset] + (exif_data[offset + 1] << 8);
}
return (exif_data[offset] << 8) + exif_data[offset + 1];
}

inline uint32_t get_uint32(
const std::vector<unsigned char>& exif_data,
uint16_t endianness,
const size_t offset) {
if (offset + 3 >= exif_data.size()) {
return INCORRECT_TAG;
}

if (endianness == ENDIANNESS_INTEL) {
return exif_data[offset] + (exif_data[offset + 1] << 8) +
(exif_data[offset + 2] << 16) + (exif_data[offset + 3] << 24);
}
return (exif_data[offset] << 24) + (exif_data[offset + 1] << 16) +
(exif_data[offset + 2] << 8) + exif_data[offset + 3];
}

inline int fetch_exif_orientation(j_decompress_ptr cinfo) {
int exif_orientation = -1;
// Check for Exif marker APP1
jpeg_saved_marker_ptr exif_marker = 0;
jpeg_saved_marker_ptr cmarker = cinfo->marker_list;
while (cmarker && exif_marker == 0) {
if (cmarker->marker == APP1) {
exif_marker = cmarker;
}
cmarker = cmarker->next;
}

if (exif_marker) {
// Exif binary structure looks like this
// First 6 bytes: [E, x, i, f, 0, 0]
// Endianness, 2 bytes : [M, M] or [I, I]
// Tag mark, 2 bytes: [0, 0x2a]
// Offset, 4 bytes
// Num entries, 2 bytes
// Tag entries and data, tag has 2 bytes and its data has 10 bytes
// For more details:
// http://www.media.mit.edu/pia/Research/deepview/exif.html

// Bytes from Exif size field to the first TIFF header
constexpr size_t start_offset = 6;
if (exif_marker->data_length > start_offset) {
auto* exif_data_ptr = exif_marker->data + start_offset;
auto size = exif_marker->data_length - start_offset;
// Here we copy the data into the vector structure
// TODO: we can avoid copying the data and read directly from the pointer
std::vector<unsigned char> exif_data_vec(
exif_data_ptr, exif_data_ptr + size);

auto endianness = get_endianness(exif_data_vec);

// Checking whether Tag Mark (0x002A) correspond to one contained in the
// Jpeg file
uint16_t tag_mark = get_uint16(exif_data_vec, endianness, 2);
if (tag_mark == REQ_EXIF_TAG_MARK) {
auto offset = get_uint32(exif_data_vec, endianness, 4);
size_t num_entry = get_uint16(exif_data_vec, endianness, offset);
offset += 2; // go to start of tag fields
constexpr size_t tiff_field_size = 12;
for (size_t entry = 0; entry < num_entry; entry++) {
// Here we just search for orientation tag and parse it
auto tag_num = get_uint16(exif_data_vec, endianness, offset);
if (tag_num == INCORRECT_TAG) {
break;
}
if (tag_num == ORIENTATION_EXIF_TAG) {
exif_orientation =
get_uint16(exif_data_vec, endianness, offset + 8);
break;
}
offset += tiff_field_size;
}
}
}
}
return exif_orientation;
}

constexpr uint16_t IMAGE_ORIENTATION_TL = 1; // normal orientation
constexpr uint16_t IMAGE_ORIENTATION_TR = 2; // needs horizontal flip
constexpr uint16_t IMAGE_ORIENTATION_BR = 3; // needs 180 rotation
constexpr uint16_t IMAGE_ORIENTATION_BL = 4; // needs vertical flip
constexpr uint16_t IMAGE_ORIENTATION_LT =
5; // mirrored horizontal & rotate 270 CW
constexpr uint16_t IMAGE_ORIENTATION_RT = 6; // rotate 90 CW
constexpr uint16_t IMAGE_ORIENTATION_RB =
7; // mirrored horizontal & rotate 90 CW
constexpr uint16_t IMAGE_ORIENTATION_LB = 8; // needs 270 CW rotation

inline torch::Tensor exif_orientation_transform(
const torch::Tensor& image,
int orientation) {
if (orientation == IMAGE_ORIENTATION_TL) {
return image;
} else if (orientation == IMAGE_ORIENTATION_TR) {
return image.flip(-1);
} else if (orientation == IMAGE_ORIENTATION_BR) {
// needs 180 rotation equivalent to
// flip both horizontally and vertically
return image.flip({-2, -1});
} else if (orientation == IMAGE_ORIENTATION_BL) {
return image.flip(-2);
} else if (orientation == IMAGE_ORIENTATION_LT) {
return image.transpose(-1, -2);
} else if (orientation == IMAGE_ORIENTATION_RT) {
return image.transpose(-1, -2).flip(-1);
} else if (orientation == IMAGE_ORIENTATION_RB) {
return image.transpose(-1, -2).flip({-2, -1});
} else if (orientation == IMAGE_ORIENTATION_LB) {
return image.transpose(-1, -2).flip(-2);
}
return image;
}

} // namespace exif_private
} // namespace image
} // namespace vision
6 changes: 4 additions & 2 deletions torchvision/csrc/io/image/image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@ static auto registry =
torch::RegisterOperators()
.op("image::decode_png", &decode_png)
.op("image::encode_png", &encode_png)
.op("image::decode_jpeg", &decode_jpeg)
.op("image::decode_jpeg(Tensor data, int mode, bool apply_exif_orientation=False) -> Tensor",
&decode_jpeg)
.op("image::encode_jpeg", &encode_jpeg)
.op("image::read_file", &read_file)
.op("image::write_file", &write_file)
.op("image::decode_image", &decode_image)
.op("image::decode_image(Tensor data, int mode, bool apply_exif_orientation=False) -> Tensor",
&decode_image)
.op("image::decode_jpeg_cuda", &decode_jpeg_cuda)
.op("image::_jpeg_version", &_jpeg_version)
.op("image::_is_compiled_against_turbo", &_is_compiled_against_turbo);
Expand Down
Loading

0 comments on commit f3298dc

Please sign in to comment.