pytorch · fmassa · Aug 6, 2020 · Jul 21, 2020 · Jul 21, 2020 · Jul 21, 2020
diff --git a/test/test_functional_tensor.py b/test/test_functional_tensor.py
@@ -385,93 +385,116 @@ def test_resized_crop(self):
         )
 
     def test_affine(self):
-        # Tests on square image
-        tensor, pil_img = self._create_data(26, 26)
-
+        # Tests on square and rectangular images
         scripted_affine = torch.jit.script(F.affine)
-        # 1) identity map
-        out_tensor = F.affine(tensor, angle=0, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], resample=0)
-        self.assertTrue(
-            tensor.equal(out_tensor), msg="{} vs {}".format(out_tensor[0, :5, :5], tensor[0, :5, :5])
-        )
-        out_tensor = scripted_affine(tensor, angle=0, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], resample=0)
-        self.assertTrue(
-            tensor.equal(out_tensor), msg="{} vs {}".format(out_tensor[0, :5, :5], tensor[0, :5, :5])
-        )
 
-        # 2) Test rotation
-        test_configs = [
-            (90, torch.rot90(tensor, k=1, dims=(-1, -2))),
-            (45, None),
-            (30, None),
-            (-30, None),
-            (-45, None),
-            (-90, torch.rot90(tensor, k=-1, dims=(-1, -2))),
-            (180, torch.rot90(tensor, k=2, dims=(-1, -2))),
-        ]
-        for a, true_tensor in test_configs:
-            for fn in [F.affine, scripted_affine]:
-                out_tensor = fn(tensor, angle=a, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], resample=0)
-                if true_tensor is not None:
-                    self.assertTrue(
-                        true_tensor.equal(out_tensor),
-                        msg="{}\n{} vs \n{}".format(a, out_tensor[0, :5, :5], true_tensor[0, :5, :5])
-                    )
-                else:
-                    true_tensor = out_tensor
-
-                out_pil_img = F.affine(pil_img, angle=a, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], resample=0)
-                out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))
-
-                num_diff_pixels = (true_tensor != out_pil_tensor).sum().item() / 3.0
-                ratio_diff_pixels = num_diff_pixels / true_tensor.shape[-1] / true_tensor.shape[-2]
-                # Tolerance : less than 6% of different pixels
-                self.assertLess(
-                    ratio_diff_pixels,
-                    0.06,
-                    msg="{}\n{} vs \n{}".format(
-                        ratio_diff_pixels, true_tensor[0, :7, :7], out_pil_tensor[0, :7, :7]
-                    )
-                )
-        # 3) Test translation
-        test_configs = [
-            [10, 12], (12, 13)
-        ]
-        for t in test_configs:
-            for fn in [F.affine, scripted_affine]:
-                out_tensor = fn(tensor, angle=0, translate=t, scale=1.0, shear=[0.0, 0.0], resample=0)
-                out_pil_img = F.affine(pil_img, angle=0, translate=t, scale=1.0, shear=[0.0, 0.0], resample=0)
-                self.compareTensorToPIL(out_tensor, out_pil_img)
-
-        # 3) Test rotation + translation + scale + share
-        test_configs = [
-            (45, [5, 6], 1.0, [0.0, 0.0]),
-            (33, (5, -4), 1.0, [0.0, 0.0]),
-            (45, [5, 4], 1.2, [0.0, 0.0]),
-            (33, (4, 8), 2.0, [0.0, 0.0]),
-            (85, (10, -10), 0.7, [0.0, 0.0]),
-            (0, [0, 0], 1.0, [35.0, ]),
-            (25, [0, 0], 1.2, [0.0, 15.0]),
-            (45, [10, 0], 0.7, [2.0, 5.0]),
-            (45, [10, -10], 1.2, [4.0, 5.0]),
-        ]
-        for r in [0, ]:
-            for a, t, s, sh in test_configs:
+        for tensor, pil_img in [self._create_data(26, 26), self._create_data(32, 26)]:
+
+            # 1) identity map
+            out_tensor = F.affine(tensor, angle=0, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], resample=0)
+            self.assertTrue(
+                tensor.equal(out_tensor), msg="{} vs {}".format(out_tensor[0, :5, :5], tensor[0, :5, :5])
+            )
+            out_tensor = scripted_affine(tensor, angle=0, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], resample=0)
+            self.assertTrue(
+                tensor.equal(out_tensor), msg="{} vs {}".format(out_tensor[0, :5, :5], tensor[0, :5, :5])
+            )
+
+            if pil_img.size[0] == pil_img.size[1]:
+                # 2) Test rotation
+                test_configs = [
+                    (90, torch.rot90(tensor, k=1, dims=(-1, -2))),
+                    (45, None),
+                    (30, None),
+                    (-30, None),
+                    (-45, None),
+                    (-90, torch.rot90(tensor, k=-1, dims=(-1, -2))),
+                    (180, torch.rot90(tensor, k=2, dims=(-1, -2))),
+                ]
+                for a, true_tensor in test_configs:
+                    for fn in [F.affine, scripted_affine]:
+                        out_tensor = fn(tensor, angle=a, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], resample=0)
+                        if true_tensor is not None:
+                            self.assertTrue(
+                                true_tensor.equal(out_tensor),
+                                msg="{}\n{} vs \n{}".format(a, out_tensor[0, :5, :5], true_tensor[0, :5, :5])
+                            )
+                        else:
+                            true_tensor = out_tensor
+
+                        out_pil_img = F.affine(pil_img, angle=a, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], resample=0)
+                        out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))
+
+                        num_diff_pixels = (true_tensor != out_pil_tensor).sum().item() / 3.0
+                        ratio_diff_pixels = num_diff_pixels / true_tensor.shape[-1] / true_tensor.shape[-2]
+                        # Tolerance : less than 6% of different pixels
+                        self.assertLess(
+                            ratio_diff_pixels,
+                            0.06,
+                            msg="{}\n{} vs \n{}".format(
+                                ratio_diff_pixels, true_tensor[0, :7, :7], out_pil_tensor[0, :7, :7]
+                            )
+                        )
+            else:
+                test_configs = [
+                    90, 45, 15, -30, -60, -120
+                ]
+                for a in test_configs:
+                    for fn in [F.affine, scripted_affine]:
+                        out_tensor = fn(tensor, angle=a, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], resample=0)
+                        out_pil_img = F.affine(pil_img, angle=a, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], resample=0)
+                        out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))
+
+                        num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0
+                        ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2]
+                        # Tolerance : less than 3% of different pixels
+                        self.assertLess(
+                            ratio_diff_pixels,
+                            0.03,
+                            msg="{}: {}\n{} vs \n{}".format(
+                                a, ratio_diff_pixels, out_tensor[0, :7, :7], out_pil_tensor[0, :7, :7]
+                            )
+                        )
+
+            # 3) Test translation
+            test_configs = [
+                [10, 12], (12, 13)
+            ]
+            for t in test_configs:
                 for fn in [F.affine, scripted_affine]:
-                    out_tensor = fn(tensor, angle=a, translate=t, scale=s, shear=sh, resample=r)
-                    out_pil_img = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh, resample=r)
-                    out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))
-
-                    num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0
-                    ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2]
-                    # Tolerance : less than 5% of different pixels
-                    self.assertLess(
-                        ratio_diff_pixels,
-                        0.05,
-                        msg="{}: {}\n{} vs \n{}".format(
-                            (r, a, t, s, sh), ratio_diff_pixels, out_tensor[0, :7, :7], out_pil_tensor[0, :7, :7]
+                    out_tensor = fn(tensor, angle=0, translate=t, scale=1.0, shear=[0.0, 0.0], resample=0)
+                    out_pil_img = F.affine(pil_img, angle=0, translate=t, scale=1.0, shear=[0.0, 0.0], resample=0)
+                    self.compareTensorToPIL(out_tensor, out_pil_img)
+
+            # 3) Test rotation + translation + scale + share
+            test_configs = [
+                (45, [5, 6], 1.0, [0.0, 0.0]),
+                (33, (5, -4), 1.0, [0.0, 0.0]),
+                (45, [5, 4], 1.2, [0.0, 0.0]),
+                (33, (4, 8), 2.0, [0.0, 0.0]),
+                (85, (10, -10), 0.7, [0.0, 0.0]),
+                (0, [0, 0], 1.0, [35.0, ]),
+                (25, [0, 0], 1.2, [0.0, 15.0]),
+                (45, [10, 0], 0.7, [2.0, 5.0]),
+                (45, [10, -10], 1.2, [4.0, 5.0]),
+            ]
+            for r in [0, ]:
+                for a, t, s, sh in test_configs:
+                    for fn in [F.affine, scripted_affine]:
+                        out_tensor = fn(tensor, angle=a, translate=t, scale=s, shear=sh, resample=r)
+                        out_pil_img = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh, resample=r)
+                        out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))
+
+                        num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0
+                        ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2]
+                        # Tolerance : less than 5% of different pixels
+                        self.assertLess(
+                            ratio_diff_pixels,
+                            0.05,
+                            msg="{}: {}\n{} vs \n{}".format(
+                                (r, a, t, s, sh), ratio_diff_pixels, out_tensor[0, :7, :7], out_pil_tensor[0, :7, :7]
+                            )
                         )
-                    )
 
 
 if __name__ == '__main__':

diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py
@@ -908,10 +908,14 @@ def affine(
 
         return F_pil.affine(img, matrix=matrix, resample=resample, fillcolor=fillcolor)
 
-    # we need to rescale translate by image size / 2 as its values can be between -1 and 1
-    translate = [2.0 * t / s for s, t in zip(img_size, translate)]
+    if img_size[0] == img_size[1]:
+        # we need to rescale translate by image size / 2 as its values can be between -1 and 1
+        translate_f = [2.0 * t / s for s, t in zip(img_size, translate)]
+    else:
+        # if rectangular image, we should not rescale translation part
+        translate_f = [1.0 * t for t in translate]
 
-    matrix = _get_inverse_affine_matrix([0, 0], angle, translate, scale, shear)
+    matrix = _get_inverse_affine_matrix([0, 0], angle, translate_f, scale, shear)
     return F_t.affine(img, matrix=matrix, resample=resample, fillcolor=fillcolor)
 
 

diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py
@@ -619,6 +619,22 @@ def resize(img: Tensor, size: List[int], interpolation: int = 2) -> Tensor:
     return img
 
 
+def _gen_affine_grid(
+        theta: Tensor, w: int, h: int, ow: int, oh: int,
+) -> Tensor:
+    d = 0.5
+    x = torch.arange(ow) + d - ow * 0.5
+    y = torch.arange(oh) + d - oh * 0.5
+
+    y, x = torch.meshgrid(y, x)
+    pts = torch.stack([x, y, torch.ones_like(x)], dim=-1)
+    output_grid = torch.matmul(pts, theta.t())
+
+    output_grid = output_grid / torch.tensor([0.5 * w, 0.5 * h])
+
+    return output_grid.unsqueeze(dim=0)
+
+
 def affine(
         img: Tensor, matrix: List[float], resample: int = 0, fillcolor: Optional[int] = None
 ) -> Tensor:
@@ -651,7 +667,12 @@ def affine(
 
     theta = torch.tensor(matrix, dtype=torch.float).reshape(1, 2, 3)
     shape = img.shape
-    grid = affine_grid(theta, size=(1, shape[-3], shape[-2], shape[-1]), align_corners=False)
+    if shape[-2] == shape[-1]:
+        # here we need normalized translation part of theta
+        grid = affine_grid(theta, size=(1, shape[-3], shape[-2], shape[-1]), align_corners=False)
+    else:
+        # here we need denormalized translation part of theta
+        grid = _gen_affine_grid(theta[0, :, :], w=shape[-1], h=shape[-2], ow=shape[-1], oh=shape[-2])
 
     # make image NCHW
     need_squeeze = False