[proto] Fixed F.perspective signature (#6617)

vfdev-5 · datumbox · web-flow · commit 2a5fbcdd3cfa · 2022-09-21T09:37:41.000+01:00
Co-authored-by: Vasilis Vryniotis &lt;datumbox@users.noreply.github.com&gt;
diff --git a/test/prototype_transforms_dispatcher_infos.py b/test/prototype_transforms_dispatcher_infos.py
@@ -113,21 +113,14 @@ def sample_inputs(self, *types):
             features.Mask: F.pad_mask,
         },
     ),
-    # FIXME:
-    # RuntimeError: perspective() is missing value for argument 'startpoints'.
-    # Declaration: perspective(Tensor inpt, int[][] startpoints, int[][] endpoints,
-    # Enum<__torch__.torchvision.transforms.functional.InterpolationMode> interpolation=Enum<InterpolationMode.BILINEAR>,
-    # Union(float[], float, int, NoneType) fill=None) -> Tensor
-    #
-    # This is probably due to the fact that F.perspective does not have the same signature as F.perspective_image_tensor
-    # DispatcherInfo(
-    #     F.perspective,
-    #     kernels={
-    #         features.Image: F.perspective_image_tensor,
-    #         features.BoundingBox: F.perspective_bounding_box,
-    #         features.Mask: F.perspective_mask,
-    #     },
-    # ),
+    DispatcherInfo(
+        F.perspective,
+        kernels={
+            features.Image: F.perspective_image_tensor,
+            features.BoundingBox: F.perspective_bounding_box,
+            features.Mask: F.perspective_mask,
+        },
+    ),
     DispatcherInfo(
         F.center_crop,
         kernels={
diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py
@@ -894,21 +894,8 @@ def test__get_params(self, mocker):
         params = transform._get_params(image)
 
         h, w = image.image_size
-        assert len(params["startpoints"]) == 4
-        for x, y in params["startpoints"]:
-            assert x in (0, w - 1)
-            assert y in (0, h - 1)
-
-        assert len(params["endpoints"]) == 4
-        for (x, y), name in zip(params["endpoints"], ["tl", "tr", "br", "bl"]):
-            if "t" in name:
-                assert 0 <= y <= int(dscale * h // 2), (x, y, name)
-            if "b" in name:
-                assert h - int(dscale * h // 2) - 1 <= y <= h, (x, y, name)
-            if "l" in name:
-                assert 0 <= x <= int(dscale * w // 2), (x, y, name)
-            if "r" in name:
-                assert w - int(dscale * w // 2) - 1 <= x <= w, (x, y, name)
+        assert "perspective_coeffs" in params
+        assert len(params["perspective_coeffs"]) == 8
 
     @pytest.mark.parametrize("distortion_scale", [0.1, 0.7])
     def test__transform(self, distortion_scale, mocker):
diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py
@@ -1232,7 +1232,7 @@ def _compute_expected_bbox(bbox, pcoeffs_):
             np.max(transformed_points[:, 1]),
         ]
         out_bbox = features.BoundingBox(
-            out_bbox,
+            np.array(out_bbox),
             format=features.BoundingBoxFormat.XYXY,
             image_size=bbox.image_size,
             dtype=bbox.dtype,
diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py
@@ -9,6 +9,7 @@
 from torchvision.ops.boxes import box_iou
 from torchvision.prototype import features
 from torchvision.prototype.transforms import functional as F, InterpolationMode, Transform
+from torchvision.transforms.functional import _get_perspective_coeffs
 
 from typing_extensions import Literal
 
@@ -556,7 +557,8 @@ def _get_params(self, sample: Any) -> Dict[str, Any]:
         ]
         startpoints = [[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]]
         endpoints = [topleft, topright, botright, botleft]
-        return dict(startpoints=startpoints, endpoints=endpoints)
+        perspective_coeffs = _get_perspective_coeffs(startpoints, endpoints)
+        return dict(perspective_coeffs=perspective_coeffs)
 
     def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
         fill = self.fill[type(inpt)]
diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py
@@ -9,7 +9,6 @@
 from torchvision.transforms.functional import (
     _compute_resized_output_size,
     _get_inverse_affine_matrix,
-    _get_perspective_coeffs,
     InterpolationMode,
     pil_modes_mapping,
     pil_to_tensor,
@@ -876,13 +875,10 @@ def perspective_mask(
 
 def perspective(
     inpt: features.DType,
-    startpoints: List[List[int]],
-    endpoints: List[List[int]],
+    perspective_coeffs: List[float],
     interpolation: InterpolationMode = InterpolationMode.BILINEAR,
     fill: Optional[Union[int, float, List[float]]] = None,
 ) -> features.DType:
-    perspective_coeffs = _get_perspective_coeffs(startpoints, endpoints)
-
     if isinstance(inpt, torch.Tensor) and (torch.jit.is_scripting() or not isinstance(inpt, features._Feature)):
         return perspective_image_tensor(inpt, perspective_coeffs, interpolation=interpolation, fill=fill)
     elif isinstance(inpt, features._Feature):

Original file line number	Diff line number	Diff line change
`@@ -1232,7 +1232,7 @@ def _compute_expected_bbox(bbox, pcoeffs_):`
`1232`	`1232`	`np.max(transformed_points[:, 1]),`
`1233`	`1233`	`]`
`1234`	`1234`	`out_bbox = features.BoundingBox(`
`1235`		`- out_bbox,`
	`1235`	`+ np.array(out_bbox),`
`1236`	`1236`	`format=features.BoundingBoxFormat.XYXY,`
`1237`	`1237`	`image_size=bbox.image_size,`
`1238`	`1238`	`dtype=bbox.dtype,`