Skip to content

Commit d0394b7

Browse files
committed
Merge branch 'main' into fix-hardcoded-255
2 parents 402b01f + d8cec34 commit d0394b7

File tree

7 files changed

+163
-53
lines changed

7 files changed

+163
-53
lines changed

.github/workflows/build-wheels-m1.yml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
name: Build M1 Wheels
2+
3+
on:
4+
pull_request:
5+
push:
6+
branches:
7+
- nightly
8+
workflow_dispatch:
9+
10+
jobs:
11+
generate-matrix:
12+
uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
13+
with:
14+
package-type: wheel
15+
os: macos-arm64
16+
test-infra-repository: pytorch/test-infra
17+
test-infra-ref: main
18+
build:
19+
needs: generate-matrix
20+
strategy:
21+
fail-fast: false
22+
matrix:
23+
include:
24+
- repository: pytorch/vision
25+
pre-script: ""
26+
post-script: ""
27+
smoke-test-script: test/smoke_test.py
28+
package-name: torchvision
29+
name: ${{ matrix.repository }}
30+
uses: pytorch/test-infra/.github/workflows/build_wheels_macos.yml@main
31+
with:
32+
repository: ${{ matrix.repository }}
33+
ref: ""
34+
test-infra-repository: pytorch/test-infra
35+
test-infra-ref: main
36+
build-matrix: ${{ needs.generate-matrix.outputs.matrix }}
37+
pre-script: ${{ matrix.pre-script }}
38+
post-script: ${{ matrix.post-script }}
39+
package-name: ${{ matrix.package-name }}
40+
runner-type: macos-m1-12
41+
smoke-test-script: ${{ matrix.smoke-test-script }}
42+
# Using "development" as trigger event so these binaries are not uploaded
43+
# to official channels yet
44+
trigger-event: development
45+
secrets:
46+
AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }}
47+
AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }}

test/prototype_transforms_kernel_infos.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -862,6 +862,27 @@ def sample_inputs_crop_video():
862862
yield ArgsKwargs(video_loader, top=4, left=3, height=7, width=8)
863863

864864

865+
def reference_crop_bounding_box(bounding_box, *, format, top, left, height, width):
866+
867+
affine_matrix = np.array(
868+
[
869+
[1, 0, -left],
870+
[0, 1, -top],
871+
],
872+
dtype="float32",
873+
)
874+
875+
expected_bboxes = reference_affine_bounding_box_helper(bounding_box, format=format, affine_matrix=affine_matrix)
876+
return expected_bboxes, (height, width)
877+
878+
879+
def reference_inputs_crop_bounding_box():
880+
for bounding_box_loader, params in itertools.product(
881+
make_bounding_box_loaders(extra_dims=((), (4,))), [_CROP_PARAMS[0], _CROP_PARAMS[-1]]
882+
):
883+
yield ArgsKwargs(bounding_box_loader, format=bounding_box_loader.format, **params)
884+
885+
865886
KERNEL_INFOS.extend(
866887
[
867888
KernelInfo(
@@ -875,6 +896,8 @@ def sample_inputs_crop_video():
875896
KernelInfo(
876897
F.crop_bounding_box,
877898
sample_inputs_fn=sample_inputs_crop_bounding_box,
899+
reference_fn=reference_crop_bounding_box,
900+
reference_inputs_fn=reference_inputs_crop_bounding_box,
878901
),
879902
KernelInfo(
880903
F.crop_mask,

test/test_extended_models.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import copy
12
import os
23

34
import pytest
@@ -59,6 +60,25 @@ def test_get_model_weights(name, weight):
5960
assert models.get_model_weights(name) == weight
6061

6162

63+
@pytest.mark.parametrize("copy_fn", [copy.copy, copy.deepcopy])
64+
@pytest.mark.parametrize(
65+
"name",
66+
[
67+
"resnet50",
68+
"retinanet_resnet50_fpn_v2",
69+
"raft_large",
70+
"quantized_resnet50",
71+
"lraspp_mobilenet_v3_large",
72+
"mvit_v1_b",
73+
],
74+
)
75+
def test_weights_copyable(copy_fn, name):
76+
model_weights = models.get_model_weights(name)
77+
for weights in list(model_weights):
78+
copied_weights = copy_fn(weights)
79+
assert copied_weights is weights
80+
81+
6282
@pytest.mark.parametrize(
6383
"module", [models, models.detection, models.quantization, models.segmentation, models.video, models.optical_flow]
6484
)

test/test_prototype_transforms_functional.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -900,7 +900,8 @@ def test_correctness_center_crop_bounding_box(device, output_size):
900900
def _compute_expected_bbox(bbox, output_size_):
901901
format_ = bbox.format
902902
spatial_size_ = bbox.spatial_size
903-
bbox = convert_format_bounding_box(bbox, format_, features.BoundingBoxFormat.XYWH)
903+
dtype = bbox.dtype
904+
bbox = convert_format_bounding_box(bbox.float(), format_, features.BoundingBoxFormat.XYWH)
904905

905906
if len(output_size_) == 1:
906907
output_size_.append(output_size_[-1])
@@ -913,14 +914,9 @@ def _compute_expected_bbox(bbox, output_size_):
913914
bbox[2].item(),
914915
bbox[3].item(),
915916
]
916-
out_bbox = features.BoundingBox(
917-
out_bbox,
918-
format=features.BoundingBoxFormat.XYWH,
919-
spatial_size=output_size_,
920-
dtype=bbox.dtype,
921-
device=bbox.device,
922-
)
923-
return convert_format_bounding_box(out_bbox, features.BoundingBoxFormat.XYWH, format_)
917+
out_bbox = torch.tensor(out_bbox)
918+
out_bbox = convert_format_bounding_box(out_bbox, features.BoundingBoxFormat.XYWH, format_)
919+
return out_bbox.to(dtype=dtype, device=bbox.device)
924920

925921
for bboxes in make_bounding_boxes(extra_dims=((4,),)):
926922
bboxes = bboxes.to(device)

torchvision/models/_api.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,9 @@ def __getattr__(self, name):
7575
return object.__getattribute__(self.value, name)
7676
return super().__getattr__(name)
7777

78+
def __deepcopy__(self, memodict=None):
79+
return self
80+
7881

7982
def get_weight(name: str) -> WeightsEnum:
8083
"""

torchvision/prototype/transforms/functional/_geometry.py

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import PIL.Image
66
import torch
7+
from torch.nn.functional import interpolate
78
from torchvision.prototype import features
89
from torchvision.transforms import functional_pil as _FP, functional_tensor as _FT
910
from torchvision.transforms.functional import (
@@ -115,20 +116,37 @@ def resize_image_tensor(
115116
max_size: Optional[int] = None,
116117
antialias: bool = False,
117118
) -> torch.Tensor:
119+
align_corners: Optional[bool] = None
120+
if interpolation == InterpolationMode.BILINEAR or interpolation == InterpolationMode.BICUBIC:
121+
align_corners = False
122+
elif antialias:
123+
raise ValueError("Antialias option is supported for bilinear and bicubic interpolation modes only")
124+
118125
shape = image.shape
119126
num_channels, old_height, old_width = shape[-3:]
120127
new_height, new_width = _compute_resized_output_size((old_height, old_width), size=size, max_size=max_size)
121128

122129
if image.numel() > 0:
123130
image = image.reshape(-1, num_channels, old_height, old_width)
124131

125-
image = _FT.resize(
132+
dtype = image.dtype
133+
need_cast = dtype not in (torch.float32, torch.float64)
134+
if need_cast:
135+
image = image.to(dtype=torch.float32)
136+
137+
image = interpolate(
126138
image,
127139
size=[new_height, new_width],
128-
interpolation=interpolation.value,
140+
mode=interpolation.value,
141+
align_corners=align_corners,
129142
antialias=antialias,
130143
)
131144

145+
if need_cast:
146+
if interpolation == InterpolationMode.BICUBIC and dtype == torch.uint8:
147+
image = image.clamp_(min=0, max=255)
148+
image = image.round_().to(dtype=dtype)
149+
132150
return image.reshape(shape[:-3] + (num_channels, new_height, new_width))
133151

134152

@@ -802,22 +820,17 @@ def crop_bounding_box(
802820
height: int,
803821
width: int,
804822
) -> Tuple[torch.Tensor, Tuple[int, int]]:
805-
# TODO: Investigate if it makes sense from a performance perspective to have an implementation for every
806-
# BoundingBoxFormat instead of converting back and forth
807-
bounding_box = convert_format_bounding_box(
808-
bounding_box.clone(), old_format=format, new_format=features.BoundingBoxFormat.XYXY, inplace=True
809-
)
823+
824+
bounding_box = bounding_box.clone()
810825

811826
# Crop or implicit pad if left and/or top have negative values:
812-
bounding_box[..., 0::2] -= left
813-
bounding_box[..., 1::2] -= top
827+
if format == features.BoundingBoxFormat.XYXY:
828+
sub = torch.tensor([left, top, left, top], device=bounding_box.device)
829+
else:
830+
sub = torch.tensor([left, top, 0, 0], device=bounding_box.device)
831+
bounding_box = bounding_box.sub_(sub)
814832

815-
return (
816-
convert_format_bounding_box(
817-
bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format, inplace=True
818-
),
819-
(height, width),
820-
)
833+
return bounding_box, (height, width)
821834

822835

823836
def crop_mask(mask: torch.Tensor, top: int, left: int, height: int, width: int) -> torch.Tensor:
@@ -1317,9 +1330,11 @@ def resized_crop(
13171330

13181331
def _parse_five_crop_size(size: List[int]) -> List[int]:
13191332
if isinstance(size, numbers.Number):
1320-
size = [int(size), int(size)]
1333+
s = int(size)
1334+
size = [s, s]
13211335
elif isinstance(size, (tuple, list)) and len(size) == 1:
1322-
size = [size[0], size[0]]
1336+
s = size[0]
1337+
size = [s, s]
13231338

13241339
if len(size) != 2:
13251340
raise ValueError("Please provide only two dimensions (h, w) for size.")

torchvision/prototype/transforms/functional/_misc.py

Lines changed: 33 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import torch
66
from torch.nn.functional import conv2d, pad as torch_pad
77
from torchvision.prototype import features
8-
from torchvision.transforms import functional_tensor as _FT
98
from torchvision.transforms.functional import pil_to_tensor, to_pil_image
109

1110

@@ -68,9 +67,9 @@ def normalize(
6867

6968

7069
def _get_gaussian_kernel1d(kernel_size: int, sigma: float, dtype: torch.dtype, device: torch.device) -> torch.Tensor:
71-
lim = (kernel_size - 1) / (2 * math.sqrt(2) * sigma)
70+
lim = (kernel_size - 1) / (2.0 * math.sqrt(2.0) * sigma)
7271
x = torch.linspace(-lim, lim, steps=kernel_size, dtype=dtype, device=device)
73-
kernel1d = torch.softmax(-x.pow_(2), dim=0)
72+
kernel1d = torch.softmax(x.pow_(2).neg_(), dim=0)
7473
return kernel1d
7574

7675

@@ -89,54 +88,61 @@ def gaussian_blur_image_tensor(
8988
# TODO: consider deprecating integers from sigma on the future
9089
if isinstance(kernel_size, int):
9190
kernel_size = [kernel_size, kernel_size]
92-
if len(kernel_size) != 2:
91+
elif len(kernel_size) != 2:
9392
raise ValueError(f"If kernel_size is a sequence its length should be 2. Got {len(kernel_size)}")
9493
for ksize in kernel_size:
9594
if ksize % 2 == 0 or ksize < 0:
9695
raise ValueError(f"kernel_size should have odd and positive integers. Got {kernel_size}")
9796

9897
if sigma is None:
9998
sigma = [ksize * 0.15 + 0.35 for ksize in kernel_size]
100-
101-
if sigma is not None and not isinstance(sigma, (int, float, list, tuple)):
102-
raise TypeError(f"sigma should be either float or sequence of floats. Got {type(sigma)}")
103-
if isinstance(sigma, (int, float)):
104-
sigma = [float(sigma), float(sigma)]
105-
if isinstance(sigma, (list, tuple)) and len(sigma) == 1:
106-
sigma = [sigma[0], sigma[0]]
107-
if len(sigma) != 2:
108-
raise ValueError(f"If sigma is a sequence, its length should be 2. Got {len(sigma)}")
99+
else:
100+
if isinstance(sigma, (list, tuple)):
101+
length = len(sigma)
102+
if length == 1:
103+
s = float(sigma[0])
104+
sigma = [s, s]
105+
elif length != 2:
106+
raise ValueError(f"If sigma is a sequence, its length should be 2. Got {length}")
107+
elif isinstance(sigma, (int, float)):
108+
s = float(sigma)
109+
sigma = [s, s]
110+
else:
111+
raise TypeError(f"sigma should be either float or sequence of floats. Got {type(sigma)}")
109112
for s in sigma:
110113
if s <= 0.0:
111114
raise ValueError(f"sigma should have positive values. Got {sigma}")
112115

113116
if image.numel() == 0:
114117
return image
115118

119+
dtype = image.dtype
116120
shape = image.shape
117-
118-
if image.ndim > 4:
121+
ndim = image.ndim
122+
if ndim == 3:
123+
image = image.unsqueeze(dim=0)
124+
elif ndim > 4:
119125
image = image.reshape((-1,) + shape[-3:])
120-
needs_unsquash = True
121-
else:
122-
needs_unsquash = False
123126

124-
dtype = image.dtype if torch.is_floating_point(image) else torch.float32
125-
kernel = _get_gaussian_kernel2d(kernel_size, sigma, dtype=dtype, device=image.device)
126-
kernel = kernel.expand(image.shape[-3], 1, kernel.shape[0], kernel.shape[1])
127+
fp = torch.is_floating_point(image)
128+
kernel = _get_gaussian_kernel2d(kernel_size, sigma, dtype=dtype if fp else torch.float32, device=image.device)
129+
kernel = kernel.expand(shape[-3], 1, kernel.shape[0], kernel.shape[1])
127130

128-
image, need_cast, need_squeeze, out_dtype = _FT._cast_squeeze_in(image, [kernel.dtype])
131+
output = image if fp else image.to(dtype=torch.float32)
129132

130133
# padding = (left, right, top, bottom)
131134
padding = [kernel_size[0] // 2, kernel_size[0] // 2, kernel_size[1] // 2, kernel_size[1] // 2]
132-
output = torch_pad(image, padding, mode="reflect")
133-
output = conv2d(output, kernel, groups=output.shape[-3])
134-
135-
output = _FT._cast_squeeze_out(output, need_cast, need_squeeze, out_dtype)
135+
output = torch_pad(output, padding, mode="reflect")
136+
output = conv2d(output, kernel, groups=shape[-3])
136137

137-
if needs_unsquash:
138+
if ndim == 3:
139+
output = output.squeeze(dim=0)
140+
elif ndim > 4:
138141
output = output.reshape(shape)
139142

143+
if not fp:
144+
output = output.round_().to(dtype=dtype)
145+
140146
return output
141147

142148

0 commit comments

Comments
 (0)