Merge branch 'main' into update-gaussian-blur

vfdev-5 · web-flow · commit b8e4e60fa336 · 2022-10-13T14:29:33.000+02:00
diff --git a/test/prototype_transforms_kernel_infos.py b/test/prototype_transforms_kernel_infos.py
@@ -232,6 +232,7 @@ def reference_inputs_resize_image_tensor():
         make_image_loaders(extra_dims=[()]),
         [
             F.InterpolationMode.NEAREST,
+            F.InterpolationMode.NEAREST_EXACT,
             F.InterpolationMode.BILINEAR,
             F.InterpolationMode.BICUBIC,
         ],
@@ -881,6 +882,7 @@ def reference_inputs_resized_crop_image_tensor():
         make_image_loaders(extra_dims=[()]),
         [
             F.InterpolationMode.NEAREST,
+            F.InterpolationMode.NEAREST_EXACT,
             F.InterpolationMode.BILINEAR,
             F.InterpolationMode.BICUBIC,
         ],
diff --git a/test/test_functional_tensor.py b/test/test_functional_tensor.py
@@ -25,7 +25,12 @@
 )
 from torchvision.transforms import InterpolationMode
 
-NEAREST, BILINEAR, BICUBIC = InterpolationMode.NEAREST, InterpolationMode.BILINEAR, InterpolationMode.BICUBIC
+NEAREST, NEAREST_EXACT, BILINEAR, BICUBIC = (
+    InterpolationMode.NEAREST,
+    InterpolationMode.NEAREST_EXACT,
+    InterpolationMode.BILINEAR,
+    InterpolationMode.BICUBIC,
+)
 
 
 @pytest.mark.parametrize("device", cpu_and_gpu())
@@ -506,7 +511,7 @@ def test_perspective_interpolation_warning():
     ],
 )
 @pytest.mark.parametrize("max_size", [None, 34, 40, 1000])
-@pytest.mark.parametrize("interpolation", [BILINEAR, BICUBIC, NEAREST])
+@pytest.mark.parametrize("interpolation", [BILINEAR, BICUBIC, NEAREST, NEAREST_EXACT])
 def test_resize(device, dt, size, max_size, interpolation):
 
     if dt == torch.float16 and device == "cpu":
@@ -966,7 +971,7 @@ def test_pad(device, dt, pad, config):
 
 
 @pytest.mark.parametrize("device", cpu_and_gpu())
-@pytest.mark.parametrize("mode", [NEAREST, BILINEAR, BICUBIC])
+@pytest.mark.parametrize("mode", [NEAREST, NEAREST_EXACT, BILINEAR, BICUBIC])
 def test_resized_crop(device, mode):
     # test values of F.resized_crop in several cases:
     # 1) resize to the same size, crop to the same size => should be identity
diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py
@@ -1789,3 +1789,41 @@ def test__transform(self, mocker):
         mock_resize.assert_called_with(
             inpt_sentinel, size_sentinel, interpolation=interpolation_sentinel, antialias=antialias_sentinel
         )
+
+
+@pytest.mark.parametrize(
+    ("dtype", "expected_dtypes"),
+    [
+        (
+            torch.float64,
+            {torch.Tensor: torch.float64, features.Image: torch.float64, features.BoundingBox: torch.float64},
+        ),
+        (
+            {torch.Tensor: torch.int32, features.Image: torch.float32, features.BoundingBox: torch.float64},
+            {torch.Tensor: torch.int32, features.Image: torch.float32, features.BoundingBox: torch.float64},
+        ),
+    ],
+)
+def test_to_dtype(dtype, expected_dtypes):
+    sample = dict(
+        plain_tensor=torch.testing.make_tensor(5, dtype=torch.int64, device="cpu"),
+        image=make_image(dtype=torch.uint8),
+        bounding_box=make_bounding_box(format=features.BoundingBoxFormat.XYXY, dtype=torch.float32),
+        str="str",
+        int=0,
+    )
+
+    transform = transforms.ToDtype(dtype)
+    transformed_sample = transform(sample)
+
+    for key, value in sample.items():
+        value_type = type(value)
+        transformed_value = transformed_sample[key]
+
+        # make sure the transformation retains the type
+        assert isinstance(transformed_value, value_type)
+
+        if isinstance(value, torch.Tensor):
+            assert transformed_value.dtype is expected_dtypes[value_type]
+        else:
+            assert transformed_value is value
diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py
@@ -1037,3 +1037,14 @@ def test_to_image_pil(inpt, mode):
     assert isinstance(output, PIL.Image.Image)
 
     assert np.asarray(inpt).sum() == np.asarray(output).sum()
+
+
+def test_equalize_image_tensor_edge_cases():
+    inpt = torch.zeros(3, 200, 200, dtype=torch.uint8)
+    output = F.equalize_image_tensor(inpt)
+    torch.testing.assert_close(inpt, output)
+
+    inpt = torch.zeros(5, 3, 200, 200, dtype=torch.uint8)
+    inpt[..., 100:, 100:] = 1
+    output = F.equalize_image_tensor(inpt)
+    assert output.unique().tolist() == [0, 255]
diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py
@@ -20,7 +20,12 @@
 from torchvision.transforms import functional as F, InterpolationMode
 from torchvision.transforms.autoaugment import _apply_op
 
-NEAREST, BILINEAR, BICUBIC = InterpolationMode.NEAREST, InterpolationMode.BILINEAR, InterpolationMode.BICUBIC
+NEAREST, NEAREST_EXACT, BILINEAR, BICUBIC = (
+    InterpolationMode.NEAREST,
+    InterpolationMode.NEAREST_EXACT,
+    InterpolationMode.BILINEAR,
+    InterpolationMode.BICUBIC,
+)
 
 
 def _test_transform_vs_scripted(transform, s_transform, tensor, msg=None):
@@ -378,7 +383,7 @@ def test_resize_int(self, size):
     @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64])
     @pytest.mark.parametrize("size", [[32], [32, 32], (32, 32), [34, 35]])
     @pytest.mark.parametrize("max_size", [None, 35, 1000])
-    @pytest.mark.parametrize("interpolation", [BILINEAR, BICUBIC, NEAREST])
+    @pytest.mark.parametrize("interpolation", [BILINEAR, BICUBIC, NEAREST, NEAREST_EXACT])
     def test_resize_scripted(self, dt, size, max_size, interpolation, device):
         tensor, _ = _create_data(height=34, width=36, device=device)
         batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=device)
@@ -402,12 +407,12 @@ def test_resize_save_load(self, tmpdir):
     @pytest.mark.parametrize("scale", [(0.7, 1.2), [0.7, 1.2]])
     @pytest.mark.parametrize("ratio", [(0.75, 1.333), [0.75, 1.333]])
     @pytest.mark.parametrize("size", [(32,), [44], [32], [32, 32], (32, 32), [44, 55]])
-    @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR, BICUBIC])
+    @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR, BICUBIC, NEAREST_EXACT])
     @pytest.mark.parametrize("antialias", [None, True, False])
     def test_resized_crop(self, scale, ratio, size, interpolation, antialias, device):
 
-        if antialias and interpolation == NEAREST:
-            pytest.skip("Can not resize if interpolation mode is NEAREST and antialias=True")
+        if antialias and interpolation in {NEAREST, NEAREST_EXACT}:
+            pytest.skip(f"Can not resize if interpolation mode is {interpolation} and antialias=True")
 
         tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=device)
         batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=device)
diff --git a/torchvision/prototype/transforms/_misc.py b/torchvision/prototype/transforms/_misc.py
@@ -1,4 +1,5 @@
 import functools
+from collections import defaultdict
 from typing import Any, Callable, Dict, Sequence, Type, Union
 
 import PIL.Image
@@ -144,14 +145,22 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
         return F.gaussian_blur(inpt, self.kernel_size, **params)
 
 
-# TODO: Enhance as described at https://github.com/pytorch/vision/issues/6697
-class ToDtype(Lambda):
-    def __init__(self, dtype: torch.dtype, *types: Type) -> None:
+class ToDtype(Transform):
+    _transformed_types = (torch.Tensor,)
+
+    def _default_dtype(self, dtype: torch.dtype) -> torch.dtype:
+        return dtype
+
+    def __init__(self, dtype: Union[torch.dtype, Dict[Type, torch.dtype]]) -> None:
+        super().__init__()
+        if not isinstance(dtype, dict):
+            # This weird looking construct only exists, since `lambda`'s cannot be serialized by pickle.
+            # If it were possible, we could replace this with `defaultdict(lambda: dtype)`
+            dtype = defaultdict(functools.partial(self._default_dtype, dtype))
         self.dtype = dtype
-        super().__init__(functools.partial(torch.Tensor.to, dtype=dtype), *types or (torch.Tensor,))
 
-    def extra_repr(self) -> str:
-        return ", ".join([f"dtype={self.dtype}", f"types={[type.__name__ for type in self.types]}"])
+    def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+        return inpt.to(self.dtype[type(inpt)])
 
 
 class RemoveSmallBoundingBoxes(Transform):
diff --git a/torchvision/prototype/transforms/functional/_color.py b/torchvision/prototype/transforms/functional/_color.py
@@ -183,28 +183,37 @@ def autocontrast(inpt: features.InputTypeJIT) -> features.InputTypeJIT:
         return autocontrast_image_pil(inpt)
 
 
-def _scale_channel(img_chan: torch.Tensor) -> torch.Tensor:
-    # TODO: we should expect bincount to always be faster than histc, but this
-    # isn't always the case. Once
-    # https://github.com/pytorch/pytorch/issues/53194 is fixed, remove the if
-    # block and only use bincount.
-    if img_chan.is_cuda:
-        hist = torch.histc(img_chan.to(torch.float32), bins=256, min=0, max=255)
-    else:
-        hist = torch.bincount(img_chan.view(-1), minlength=256)
-
-    nonzero_hist = hist[hist != 0]
-    step = torch.div(nonzero_hist[:-1].sum(), 255, rounding_mode="floor")
-    if step == 0:
-        return img_chan
-
-    lut = torch.div(torch.cumsum(hist, 0) + torch.div(step, 2, rounding_mode="floor"), step, rounding_mode="floor")
-    # Doing inplace clamp and converting lut to uint8 improves perfs
-    lut.clamp_(0, 255)
-    lut = lut.to(torch.uint8)
-    lut = torch.nn.functional.pad(lut[:-1], [1, 0])
-
-    return lut[img_chan.to(torch.int64)]
+def _equalize_image_tensor_vec(img: torch.Tensor) -> torch.Tensor:
+    # input img shape should be [N, H, W]
+    shape = img.shape
+    # Compute image histogram:
+    flat_img = img.flatten(start_dim=1).to(torch.long)  # -> [N, H * W]
+    hist = flat_img.new_zeros(shape[0], 256)
+    hist.scatter_add_(dim=1, index=flat_img, src=flat_img.new_ones(1).expand_as(flat_img))
+
+    # Compute image cdf
+    chist = hist.cumsum_(dim=1)
+    # Compute steps, where step per channel is nonzero_hist[:-1].sum() // 255
+    # Trick: nonzero_hist[:-1].sum() == chist[idx - 1], where idx = chist.argmax()
+    idx = chist.argmax(dim=1).sub_(1)
+    # If histogram is degenerate (hist of zero image), index is -1
+    neg_idx_mask = idx < 0
+    idx.clamp_(min=0)
+    step = chist.gather(dim=1, index=idx.unsqueeze(1))
+    step[neg_idx_mask] = 0
+    step.div_(255, rounding_mode="floor")
+
+    # Compute batched Look-up-table:
+    # Necessary to avoid an integer division by zero, which raises
+    clamped_step = step.clamp(min=1)
+    chist.add_(torch.div(step, 2, rounding_mode="floor")).div_(clamped_step, rounding_mode="floor").clamp_(0, 255)
+    lut = chist.to(torch.uint8)  # [N, 256]
+
+    # Pad lut with zeros
+    zeros = lut.new_zeros((1, 1)).expand(shape[0], 1)
+    lut = torch.cat([zeros, lut[:, :-1]], dim=1)
+
+    return torch.where((step == 0).unsqueeze(-1), img, lut.gather(dim=1, index=flat_img).view_as(img))
 
 
 def equalize_image_tensor(image: torch.Tensor) -> torch.Tensor:
@@ -217,10 +226,8 @@ def equalize_image_tensor(image: torch.Tensor) -> torch.Tensor:
 
     if image.numel() == 0:
         return image
-    elif image.ndim == 2:
-        return _scale_channel(image)
-    else:
-        return torch.stack([_scale_channel(x) for x in image.view(-1, height, width)]).view(image.shape)
+
+    return _equalize_image_tensor_vec(image.view(-1, height, width)).view(image.shape)
 
 
 equalize_image_pil = _FP.equalize
diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py
@@ -20,10 +20,12 @@
 
 class InterpolationMode(Enum):
     """Interpolation modes
-    Available interpolation methods are ``nearest``, ``bilinear``, ``bicubic``, ``box``, ``hamming``, and ``lanczos``.
+    Available interpolation methods are ``nearest``, ``nearest-exact``, ``bilinear``, ``bicubic``, ``box``, ``hamming``,
+    and ``lanczos``.
     """
 
     NEAREST = "nearest"
+    NEAREST_EXACT = "nearest-exact"
     BILINEAR = "bilinear"
     BICUBIC = "bicubic"
     # For PIL compatibility
@@ -50,6 +52,7 @@ def _interpolation_modes_from_int(i: int) -> InterpolationMode:
     InterpolationMode.NEAREST: 0,
     InterpolationMode.BILINEAR: 2,
     InterpolationMode.BICUBIC: 3,
+    InterpolationMode.NEAREST_EXACT: 0,
     InterpolationMode.BOX: 4,
     InterpolationMode.HAMMING: 5,
     InterpolationMode.LANCZOS: 1,
@@ -416,7 +419,8 @@ def resize(
         interpolation (InterpolationMode): Desired interpolation enum defined by
             :class:`torchvision.transforms.InterpolationMode`.
             Default is ``InterpolationMode.BILINEAR``. If input is Tensor, only ``InterpolationMode.NEAREST``,
-            ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are supported.
+            ``InterpolationMode.NEAREST_EXACT``, ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are
+            supported.
             For backward compatibility integer values (e.g. ``PIL.Image[.Resampling].NEAREST``) are still accepted,
             but deprecated since 0.13 and will be removed in 0.15. Please use InterpolationMode enum.
         max_size (int, optional): The maximum allowed for the longer edge of
@@ -617,7 +621,8 @@ def resized_crop(
         interpolation (InterpolationMode): Desired interpolation enum defined by
             :class:`torchvision.transforms.InterpolationMode`.
             Default is ``InterpolationMode.BILINEAR``. If input is Tensor, only ``InterpolationMode.NEAREST``,
-            ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are supported.
+            ``InterpolationMode.NEAREST_EXACT``, ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are
+            supported.
             For backward compatibility integer values (e.g. ``PIL.Image[.Resampling].NEAREST``) are still accepted,
             but deprecated since 0.13 and will be removed in 0.15. Please use InterpolationMode enum.
         antialias (bool, optional): antialias flag. If ``img`` is PIL Image, the flag is ignored and anti-alias
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
@@ -296,8 +296,8 @@ class Resize(torch.nn.Module):
                 In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``.
         interpolation (InterpolationMode): Desired interpolation enum defined by
             :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
-            If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` and
-            ``InterpolationMode.BICUBIC`` are supported.
+            If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.NEAREST_EXACT``,
+            ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are supported.
             For backward compatibility integer values (e.g. ``PIL.Image[.Resampling].NEAREST``) are still accepted,
             but deprecated since 0.13 and will be removed in 0.15. Please use InterpolationMode enum.
         max_size (int, optional): The maximum allowed for the longer edge of
@@ -865,8 +865,8 @@ class RandomResizedCrop(torch.nn.Module):
             resizing.
         interpolation (InterpolationMode): Desired interpolation enum defined by
             :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
-            If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` and
-            ``InterpolationMode.BICUBIC`` are supported.
+            If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.NEAREST_EXACT``,
+            ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are supported.
             For backward compatibility integer values (e.g. ``PIL.Image[.Resampling].NEAREST``) are still accepted,
             but deprecated since 0.13 and will be removed in 0.15. Please use InterpolationMode enum.
         antialias (bool, optional): antialias flag. If ``img`` is PIL Image, the flag is ignored and anti-alias
@@ -2133,9 +2133,9 @@ def forward(self, tensor: Tensor) -> Tensor:
         return F.elastic_transform(tensor, displacement, self.interpolation, self.fill)
 
     def __repr__(self):
-        format_string = self.__class__.__name__ + "(alpha="
-        format_string += str(self.alpha) + ")"
-        format_string += ", (sigma=" + str(self.sigma) + ")"
-        format_string += ", interpolation={self.interpolation}"
-        format_string += ", fill={self.fill})"
+        format_string = self.__class__.__name__
+        format_string += f"(alpha={self.alpha}"
+        format_string += f", sigma={self.sigma}"
+        format_string += f", interpolation={self.interpolation}"
+        format_string += f", fill={self.fill})"
         return format_string