pytorch · NicolasHug · Jul 27, 2023 · Jul 25, 2023 · Jul 26, 2023 · Jul 26, 2023
diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst
@@ -234,7 +234,6 @@ Conversion
     v2.PILToTensor
     v2.ToImageTensor
     ConvertImageDtype
-    v2.ConvertDtype
     v2.ConvertImageDtype
     v2.ToDtype
     v2.ConvertBoundingBoxFormat

diff --git a/gallery/plot_transforms_v2_e2e.py b/gallery/plot_transforms_v2_e2e.py
@@ -29,7 +29,7 @@ def show(sample):
     image, target = sample
     if isinstance(image, PIL.Image.Image):
         image = F.to_image_tensor(image)
-    image = F.convert_dtype(image, torch.uint8)
+    image = F.to_dtype(image, torch.uint8, scale=True)
     annotated_image = draw_bounding_boxes(image, target["boxes"], colors="yellow", width=3)
 
     fig, ax = plt.subplots()

diff --git a/test/common_utils.py b/test/common_utils.py
@@ -27,7 +27,7 @@
 from torch.testing._comparison import BooleanPair, NonePair, not_close_error_metas, NumberPair, TensorLikePair
 from torchvision import datapoints, io
 from torchvision.transforms._functional_tensor import _max_value as get_max_value
-from torchvision.transforms.v2.functional import convert_dtype_image_tensor, to_image_pil, to_image_tensor
+from torchvision.transforms.v2.functional import to_dtype_image_tensor, to_image_pil, to_image_tensor
 
 
 IN_OSS_CI = any(os.getenv(var) == "true" for var in ["CIRCLECI", "GITHUB_ACTIONS"])
@@ -601,7 +601,7 @@ def fn(shape, dtype, device, memory_format):
             image_tensor = image_tensor.to(device=device, memory_format=memory_format, copy=True)
         else:
             image_tensor = image_tensor.to(device=device)
-        image_tensor = convert_dtype_image_tensor(image_tensor, dtype=dtype)
+        image_tensor = to_dtype_image_tensor(image_tensor, dtype=dtype, scale=True)
 
         return datapoints.Image(image_tensor)
 

diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
@@ -105,7 +105,7 @@ def normalize_adapter(transform, input, device):
             continue
         elif check_type(value, (datapoints.Image, datapoints.Video, is_simple_tensor)):
             # normalize doesn't support integer images
-            value = F.convert_dtype(value, torch.float32)
+            value = F.to_dtype(value, torch.float32, scale=True)
         adapted_input[key] = value
     return adapted_input
 
@@ -146,7 +146,7 @@ class TestSmoke:
             (transforms.ScaleJitter((16, 16), scale_range=(0.8, 1.2), antialias=True), None),
             (transforms.ClampBoundingBox(), None),
             (transforms.ConvertBoundingBoxFormat(datapoints.BoundingBoxFormat.CXCYWH), None),
-            (transforms.ConvertDtype(), None),
+            (transforms.ConvertImageDtype(), None),
             (transforms.GaussianBlur(kernel_size=3), None),
             (
                 transforms.LinearTransformation(

diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py
@@ -191,7 +191,7 @@ def __init__(
         closeness_kwargs=dict(rtol=None, atol=None),
     ),
     ConsistencyConfig(
-        v2_transforms.ConvertDtype,
+        v2_transforms.ConvertImageDtype,
         legacy_transforms.ConvertImageDtype,
         [
             ArgsKwargs(torch.float16),

diff --git a/test/test_transforms_v2_functional.py b/test/test_transforms_v2_functional.py
@@ -283,12 +283,12 @@ def test_float32_vs_uint8(self, test_id, info, args_kwargs):
         adapted_other_args, adapted_kwargs = info.float32_vs_uint8(other_args, kwargs)
 
         actual = info.kernel(
-            F.convert_dtype_image_tensor(input, dtype=torch.float32),
+            F.to_dtype_image_tensor(input, dtype=torch.float32, scale=True),
             *adapted_other_args,
             **adapted_kwargs,
         )
 
-        expected = F.convert_dtype_image_tensor(info.kernel(input, *other_args, **kwargs), dtype=torch.float32)
+        expected = F.to_dtype_image_tensor(info.kernel(input, *other_args, **kwargs), dtype=torch.float32, scale=True)
 
         assert_close(
             actual,
@@ -538,7 +538,6 @@ def test_bounding_box_format_consistency(self, info, args_kwargs):
             (F.get_image_num_channels, F.get_num_channels),
             (F.to_pil_image, F.to_image_pil),
             (F.elastic_transform, F.elastic),
-            (F.convert_image_dtype, F.convert_dtype_image_tensor),
             (F.to_grayscale, F.rgb_to_grayscale),
         ]
     ],
@@ -547,24 +546,6 @@ def test_alias(alias, target):
     assert alias is target
 
 
-@pytest.mark.parametrize(
-    ("info", "args_kwargs"),
-    make_info_args_kwargs_params(
-        KERNEL_INFOS_MAP[F.convert_dtype_image_tensor],
-        args_kwargs_fn=lambda info: info.sample_inputs_fn(),
-    ),
-)
-@pytest.mark.parametrize("device", cpu_and_cuda())
-def test_convert_dtype_image_tensor_dtype_and_device(info, args_kwargs, device):
-    (input, *other_args), kwargs = args_kwargs.load(device)
-    dtype = other_args[0] if other_args else kwargs.get("dtype", torch.float32)
-
-    output = info.kernel(input, dtype)
-
-    assert output.dtype == dtype
-    assert output.device == input.device
-
-
 @pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("num_channels", [1, 3])
 def test_normalize_image_tensor_stats(device, num_channels):

diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py
@@ -1,4 +1,5 @@
 import contextlib
+import decimal
 import inspect
 import math
 import re
@@ -27,6 +28,7 @@
     set_rng_seed,
 )
 from torch.testing import assert_close
+from torch.utils._pytree import tree_map
 from torchvision import datapoints
 
 from torchvision.transforms._functional_tensor import _max_value as get_max_value
@@ -68,7 +70,8 @@ def _script(fn):
     try:
         return torch.jit.script(fn)
     except Exception as error:
-        raise AssertionError(f"Trying to `torch.jit.script` '{fn.__name__}' raised the error above.") from error
+        name = getattr(fn, "__name__", fn.__class__.__name__)
+        raise AssertionError(f"Trying to `torch.jit.script` '{name}' raised the error above.") from error
 
 
 def _check_kernel_scripted_vs_eager(kernel, input, *args, rtol, atol, **kwargs):
@@ -125,6 +128,7 @@ def check_kernel(
     check_cuda_vs_cpu=True,
     check_scripted_vs_eager=True,
     check_batched_vs_unbatched=True,
+    expect_same_dtype=True,
     **kwargs,
 ):
     initial_input_version = input._version
@@ -137,7 +141,8 @@ def check_kernel(
     # check that no inplace operation happened
     assert input._version == initial_input_version
 
-    assert output.dtype == input.dtype
+    if expect_same_dtype:
+        assert output.dtype == input.dtype
     assert output.device == input.device
 
     if check_cuda_vs_cpu:
@@ -274,7 +279,7 @@ def check_dispatcher_signatures_match(dispatcher, *, kernel, input_type):
 def _check_transform_v1_compatibility(transform, input):
     """If the transform defines the ``_v1_transform_cls`` attribute, checks if the transform has a public, static
     ``get_params`` method, is scriptable, and the scripted version can be called without error."""
-    if not hasattr(transform, "_v1_transform_cls"):
+    if transform._v1_transform_cls is None:
         return
 
     if type(input) is not torch.Tensor:
@@ -1634,3 +1639,111 @@ def test_transform_negative_degrees_error(self):
     def test_transform_unknown_fill_error(self):
         with pytest.raises(TypeError, match="Got inappropriate fill arg"):
             transforms.RandomAffine(degrees=0, fill="fill")
+
+
+class TestToDtype:
+    @pytest.mark.parametrize(
+        ("kernel", "make_input"),
+        [
+            (F.to_dtype_image_tensor, make_image_tensor),
+            (F.to_dtype_image_tensor, make_image),
+            (F.to_dtype_video, make_video),
+        ],
+    )
+    @pytest.mark.parametrize("input_dtype", [torch.float32, torch.float64, torch.uint8])
+    @pytest.mark.parametrize("output_dtype", [torch.float32, torch.float64, torch.uint8])
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    @pytest.mark.parametrize("scale", (True, False))
+    def test_kernel(self, kernel, make_input, input_dtype, output_dtype, device, scale):
+        check_kernel(
+            kernel,
+            make_input(dtype=input_dtype, device=device),
+            expect_same_dtype=False,
+            dtype=output_dtype,
+            scale=scale,
+        )
+
+    @pytest.mark.parametrize(
+        ("kernel", "make_input"),
+        [
+            (F.to_dtype_image_tensor, make_image_tensor),
+            (F.to_dtype_image_tensor, make_image),
+            (F.to_dtype_video, make_video),
+        ],
+    )
+    @pytest.mark.parametrize("input_dtype", [torch.float32, torch.float64, torch.uint8])
+    @pytest.mark.parametrize("output_dtype", [torch.float32, torch.float64, torch.uint8])
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    @pytest.mark.parametrize("scale", (True, False))
+    def test_dispatcher(self, kernel, make_input, input_dtype, output_dtype, device, scale):
+        check_dispatcher(
+            F.to_dtype,
+            kernel,
+            make_input(dtype=input_dtype, device=device),
+            check_dispatch=False,  # TODO: the check would pass if we were to use the non-datapoint dependent logic of _check_dispatcher_dispatch ¯\_(ツ)_/¯
+            dtype=output_dtype,
+            scale=scale,
+        )
+
+    @pytest.mark.parametrize(
+        "make_input",
+        [make_image_tensor, make_image, make_bounding_box, make_segmentation_mask, make_video],
+    )
+    @pytest.mark.parametrize("input_dtype", [torch.float32, torch.float64, torch.uint8])
+    @pytest.mark.parametrize("output_dtype", [torch.float32, torch.float64, torch.uint8])
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    @pytest.mark.parametrize("scale", (True, False))
+    def test_transform(self, make_input, input_dtype, output_dtype, device, scale):
+        input = make_input(dtype=input_dtype, device=device)
+        output_dtype = {"others": torch.float32}
+
+        check_transform(transforms.ToDtype, input, dtype=output_dtype, scale=scale)
+
+    def reference_convert_dtype_image_tensor(self, image, dtype=torch.float, scale=False):
+        input_dtype = image.dtype
+        output_dtype = dtype
+
+        if not scale:
+            return image.to(dtype)
+
+        if output_dtype == input_dtype:
+            return image
+
+        def fn(value):
+            if input_dtype.is_floating_point:
+                if output_dtype.is_floating_point:
+                    return value
+                else:
+                    return int(decimal.Decimal(value) * torch.iinfo(output_dtype).max)
+            else:
+                input_max_value = torch.iinfo(input_dtype).max
+
+                if output_dtype.is_floating_point:
+                    return float(decimal.Decimal(value) / input_max_value)
+                else:
+                    output_max_value = torch.iinfo(output_dtype).max
+
+                    if input_max_value > output_max_value:
+                        factor = (input_max_value + 1) // (output_max_value + 1)
+                        return value / factor
+                    else:
+                        factor = (output_max_value + 1) // (input_max_value + 1)
+                        return value * factor
+
+        return torch.tensor(tree_map(fn, image.tolist()), dtype=dtype)
+
+    @pytest.mark.parametrize("input_dtype", [torch.float32, torch.float64, torch.uint8])
+    @pytest.mark.parametrize("output_dtype", [torch.float32, torch.float64, torch.uint8])
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    @pytest.mark.parametrize("scale", (True, False))
+    def test_against_ref(self, input_dtype, output_dtype, device, scale):
+        input = make_image(dtype=input_dtype, device=device)
+
+        out = F.to_dtype(input, dtype=output_dtype, scale=scale)
+        expected = self.reference_convert_dtype_image_tensor(input, dtype=output_dtype, scale=scale)
+
+        if output_dtype is torch.uint8 and scale:
+            # TODO: is this actually normal? Why wasn't this a problem before?
+            torch.testing.assert_close(out, expected, atol=1, rtol=0)
+        else:
+            torch.testing.assert_close(out, expected)
diff --git a/test/transforms_v2_dispatcher_infos.py b/test/transforms_v2_dispatcher_infos.py
@@ -364,16 +364,6 @@ def fill_sequence_needs_broadcast(args_kwargs):
             xfail_jit_python_scalar_arg("std"),
         ],
     ),
-    DispatcherInfo(
-        F.convert_dtype,
-        kernels={
-            datapoints.Image: F.convert_dtype_image_tensor,
-            datapoints.Video: F.convert_dtype_video,
-        },
-        test_marks=[
-            skip_dispatch_datapoint,
-        ],
-    ),
     DispatcherInfo(
         F.uniform_temporal_subsample,
         kernels={