Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions test/prototype_transforms_kernel_infos.py
Original file line number Diff line number Diff line change
Expand Up @@ -1947,3 +1947,41 @@ def sample_inputs_normalize_video():
),
]
)


def sample_inputs_convert_image_dtype():
for input_dtype, output_dtype in itertools.product(
[torch.uint8, torch.int64, torch.float32, torch.float64], repeat=2
):
if input_dtype.is_floating_point and output_dtype == torch.int64:
# conversion cannot be performed safely
continue

for image_loader in make_image_loaders(
sizes=["random"], color_spaces=[features.ColorSpace.RGB], dtypes=[input_dtype]
):
yield ArgsKwargs(image_loader, dtype=output_dtype)

yield ArgsKwargs(make_image_loader(color_space=features.ColorSpace.RGB), dtype=torch.uint8)


KERNEL_INFOS.extend(
[
KernelInfo(
F.convert_image_dtype,
sample_inputs_fn=sample_inputs_convert_image_dtype,
test_marks=[
TestMark(
("TestKernels", "test_scripted_vs_eager"),
pytest.mark.filterwarnings(f"ignore:{re.escape('operator() profile_node %36')}:UserWarning"),
),
TestMark(
("TestKernels", "test_dtype_and_device_consistency"),
pytest.mark.skip(reason="`convert_dtype_*` kernels convert the dtype by design"),
condition=lambda args_kwargs: args_kwargs.args[0].dtype
!= args_kwargs.kwargs.get("dtype", torch.float32),
),
],
),
]
)
46 changes: 33 additions & 13 deletions test/test_prototype_transforms_functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,20 @@ def script(fn):
raise AssertionError(f"Trying to `torch.jit.script` '{fn.__name__}' raised the error above.") from error


def make_info_args_kwargs_params(info, *, args_kwargs_fn, test_id=None):
args_kwargs = list(args_kwargs_fn(info))
idx_field_len = len(str(len(args_kwargs)))
return [
pytest.param(
info,
args_kwargs_,
marks=info.get_marks(test_id, args_kwargs_) if test_id else [],
id=f"{info.id}-{idx:0{idx_field_len}}",
)
for idx, args_kwargs_ in enumerate(args_kwargs)
]


def make_info_args_kwargs_parametrization(infos, *, args_kwargs_fn, condition=None):
if condition is None:

Expand All @@ -49,18 +63,7 @@ def decorator(test_fn):
if not condition(info):
continue

args_kwargs = list(args_kwargs_fn(info))
idx_field_len = len(str(len(args_kwargs)))

for idx, args_kwargs_ in enumerate(args_kwargs):
argvalues.append(
pytest.param(
info,
args_kwargs_,
marks=info.get_marks(test_id, args_kwargs_),
id=f"{info.id}-{idx:0{idx_field_len}}",
)
)
argvalues.extend(make_info_args_kwargs_params(info, args_kwargs_fn=args_kwargs_fn, test_id=test_id))

return pytest.mark.parametrize(argnames, argvalues)(test_fn)

Expand Down Expand Up @@ -232,7 +235,6 @@ def test_scripted_smoke(self, info, args_kwargs, device):
[
F.clamp_bounding_box,
F.convert_color_space,
F.convert_image_dtype,
F.get_dimensions,
F.get_image_num_channels,
F.get_image_size,
Expand Down Expand Up @@ -312,6 +314,24 @@ def test_alias(alias, target):
assert alias is target


@pytest.mark.parametrize(
("info", "args_kwargs"),
make_info_args_kwargs_params(
next(info for info in KERNEL_INFOS if info.kernel is F.convert_image_dtype),
args_kwargs_fn=lambda info: info.sample_inputs_fn(),
),
)
Comment on lines +317 to +323
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is rather convoluted to get the sample inputs for a single kernel. I'll refactor later since this is low priority right now.

@pytest.mark.parametrize("device", cpu_and_gpu())
def test_dtype_and_device_convert_image_dtype(info, args_kwargs, device):
(input, *other_args), kwargs = args_kwargs.load(device)
dtype = other_args[0] if other_args else kwargs.get("dtype", torch.float32)

output = info.kernel(input, dtype)

assert output.dtype == dtype
assert output.device == input.device


# TODO: All correctness checks below this line should be ported to be references on a `KernelInfo` in
# `prototype_transforms_kernel_infos.py`

Expand Down
64 changes: 62 additions & 2 deletions torchvision/prototype/transforms/functional/_type_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from torchvision.io.video import read_video
from torchvision.prototype import features
from torchvision.prototype.utils._internal import ReadOnlyTensorBuffer
from torchvision.transforms import functional as _F
from torchvision.transforms import functional as _F, functional_tensor as _FT


@torch.jit.unused
Expand Down Expand Up @@ -42,4 +42,64 @@ def to_image_tensor(image: Union[torch.Tensor, PIL.Image.Image, np.ndarray]) ->
# prevalent and well understood. Thus, we just alias it without deprecating the old name.
to_pil_image = to_image_pil

convert_image_dtype = _F.convert_image_dtype

def convert_image_dtype(image: torch.Tensor, dtype: torch.dtype = torch.float) -> torch.Tensor:
if not isinstance(image, torch.Tensor):
raise TypeError("Input img should be Tensor Image")

if image.dtype == dtype:
return image

float_input = image.is_floating_point()
if torch.jit.is_scripting():
# TODO: remove this branch as soon as `dtype.is_floating_point` is supported by JIT
float_output = torch.tensor(0, dtype=dtype).is_floating_point()
else:
float_output = dtype.is_floating_point

if float_input:
# float to float
if float_output:
return image.to(dtype)

# float to int
if (image.dtype == torch.float32 and dtype in (torch.int32, torch.int64)) or (
image.dtype == torch.float64 and dtype == torch.int64
):
raise RuntimeError(f"The conversion from {image.dtype} to {dtype} cannot be performed safely.")

# For data in the range `[0.0, 1.0]`, just multiplying by the maximum value of the integer range and converting
# to the integer dtype is not sufficient. For example, `torch.rand(...).mul(255).to(torch.uint8)` will only
# be `255` if the input is exactly `1.0`. See https://github.com/pytorch/vision/pull/2078#issuecomment-612045321
# for a detailed analysis.
# To mitigate this, we could round before we convert to the integer dtype, but this is an extra operation.
# Instead, we can also multiply by the maximum value plus something close to `1`. See
# https://github.com/pytorch/vision/pull/2078#issuecomment-613524965 for details.
eps = 1e-3
max_val = float(_FT._max_value(dtype))
# We need to scale first since the conversion would otherwise turn the input range `[0.0, 1.0]` into the
# discrete set `{0, 1}`.
return image.mul(max_val + 1.0 - eps).to(dtype)
else:
max_input_val = float(_FT._max_value(image.dtype))

# int to float
if float_output:
return image.to(dtype).div_(max_input_val)

# int to int
# TODO: The `factor`'s below are by definition powers of 2. Instead of multiplying and dividing the inputs to
# get to the desired value range, we can probably speed this up significantly with bitshifts. However, we
# probably need to be careful when converting from signed to unsigned dtypes and vice versa.
max_output_val = float(_FT._max_value(dtype))

if max_input_val > max_output_val:
# We technically don't need to convert to `int` here, but it speeds the division
factor = int((max_input_val + 1) / (max_output_val + 1))
# We need to scale first since the output dtype cannot hold all values in the input range
return image.div(factor, rounding_mode="floor").to(dtype)
else:
# We need to convert to `int` or otherwise the multiplication will turn the image into floating point. Or,
# to be more exact, the inplace multiplication will fail.
factor = int((max_output_val + 1) / (max_input_val + 1))
return image.to(dtype).mul_(factor)