Fixes incoherence in affine transformation when center is defined as …

…half image size + 0.5 (#2468) Incoherence is when affine transformation is 90 degrees rotation and output contains a zero line
pytorch · Jul 15, 2020 · a568c7f · a568c7f
1 parent 0344603
commit a568c7f
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 10 deletions.
diff --git a/test/test_transforms.py b/test/test_transforms.py
@@ -1311,14 +1311,11 @@ def test_rotate_fill(self):
 
     def test_affine(self):
         input_img = np.zeros((40, 40, 3), dtype=np.uint8)
-        pts = []
         cnt = [20, 20]
         for pt in [(16, 16), (20, 16), (20, 20)]:
             for i in range(-5, 5):
                 for j in range(-5, 5):
                     input_img[pt[0] + i, pt[1] + j, :] = [255, 155, 55]
-                    pts.append((pt[0] + i, pt[1] + j))
-        pts = list(set(pts))
 
         with self.assertRaises(TypeError):
             F.affine(input_img, 10)
@@ -1373,9 +1370,12 @@ def _test_transformation(a, t, s, sh):
             inv_true_matrix = np.linalg.inv(true_matrix)
             for y in range(true_result.shape[0]):
                 for x in range(true_result.shape[1]):
-                    res = np.dot(inv_true_matrix, [x, y, 1])
-                    _x = int(res[0] + 0.5)
-                    _y = int(res[1] + 0.5)
+                    # Same as for PIL:
+                    # https://github.com/python-pillow/Pillow/blob/71f8ec6a0cfc1008076a023c0756542539d057ab/
+                    # src/libImaging/Geometry.c#L1060
+                    input_pt = np.array([x + 0.5, y + 0.5, 1.0])
+                    res = np.floor(np.dot(inv_true_matrix, input_pt)).astype(np.int)
+                    _x, _y = res[:2]
                     if 0 <= _x < input_img.shape[1] and 0 <= _y < input_img.shape[0]:
                         true_result[y, x, :] = input_img[_y, _x, :]
 
@@ -1408,7 +1408,7 @@ def _test_transformation(a, t, s, sh):
         # Test rotation, scale, translation, shear
         for a in range(-90, 90, 25):
             for t1 in range(-10, 10, 5):
-                for s in [0.75, 0.98, 1.0, 1.1, 1.2]:
+                for s in [0.75, 0.98, 1.0, 1.2, 1.4]:
                     for sh in range(-15, 15, 5):
                         _test_transformation(a=a, t=(t1, t1), s=s, sh=(sh, sh))
 

diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py
@@ -1,12 +1,11 @@
 import math
 import numbers
 import warnings
-from collections.abc import Iterable
 from typing import Any
 
 import numpy as np
 from numpy import sin, cos, tan
-from PIL import Image, ImageOps, ImageEnhance, __version__ as PILLOW_VERSION
+from PIL import Image, __version__ as PILLOW_VERSION
 
 import torch
 from torch import Tensor
@@ -910,7 +909,10 @@ def affine(img, angle, translate, scale, shear, resample=0, fillcolor=None):
     assert scale > 0.0, "Argument scale should be positive"
 
     output_size = img.size
-    center = (img.size[0] * 0.5 + 0.5, img.size[1] * 0.5 + 0.5)
+    # center = (img.size[0] * 0.5 + 0.5, img.size[1] * 0.5 + 0.5)
+    # it is visually better to estimate the center without 0.5 offset
+    # otherwise image rotated by 90 degrees is shifted 1 pixel
+    center = (img.size[0] * 0.5, img.size[1] * 0.5)
     matrix = _get_inverse_affine_matrix(center, angle, translate, scale, shear)
     kwargs = {"fillcolor": fillcolor} if int(PILLOW_VERSION.split('.')[0]) >= 5 else {}
     return img.transform(output_size, Image.AFFINE, matrix, resample, **kwargs)