pytorch · datumbox · Oct 27, 2022 · Oct 25, 2022 · Oct 26, 2022 · Oct 26, 2022
diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py
@@ -522,7 +522,7 @@ def _get_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
             torch.randint(height - bound_height, height, size=(1,)).item(),
         ]
         startpoints = [[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]]
-        endpoints = [topleft, topright, botright, botleft]
+        endpoints: List[List[int]] = [topleft, topright, botright, botleft]  # type: ignore[list-item]
         perspective_coeffs = _get_perspective_coeffs(startpoints, endpoints)
         return dict(perspective_coeffs=perspective_coeffs)
 
@@ -622,7 +622,7 @@ def _get_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
 
         while True:
             # sample an option
-            idx = torch.randint(low=0, high=len(self.options), size=(1,)).item()
+            idx: int = torch.randint(low=0, high=len(self.options), size=(1,)).item()  # type: ignore[assignment]
             min_jaccard_overlap = self.options[idx]
             if min_jaccard_overlap >= 1.0:  # a value larger than 1 encodes the leave as-is option
                 return dict()

diff --git a/torchvision/prototype/transforms/_type_conversion.py b/torchvision/prototype/transforms/_type_conversion.py
@@ -1,4 +1,4 @@
-from typing import Any, cast, Dict, Optional, Union
+from typing import Any, Dict, Optional, Union
 
 import numpy as np
 import PIL.Image
@@ -13,7 +13,7 @@ class DecodeImage(Transform):
     _transformed_types = (features.EncodedImage,)
 
     def _transform(self, inpt: torch.Tensor, params: Dict[str, Any]) -> features.Image:
-        return cast(features.Image, F.decode_image_with_pil(inpt))
+        return F.decode_image_with_pil(inpt)  # type: ignore[no-any-return]
 @torch.jit.unused 
 def decode_image_with_pil(encoded_image: torch.Tensor) -> features.Image: 
 @torch.jit.unused 
 def decode_image_with_pil(encoded_image: torch.Tensor) -> features.Image: 
 
 
 class LabelToOneHot(Transform):
@@ -27,7 +27,7 @@ def _transform(self, inpt: features.Label, params: Dict[str, Any]) -> features.O
         num_categories = self.num_categories
         if num_categories == -1 and inpt.categories is not None:
             num_categories = len(inpt.categories)
-        output = one_hot(inpt, num_classes=num_categories)
+        output = one_hot(inpt.as_subclass(torch.Tensor), num_classes=num_categories)
         return features.OneHotLabel(output, categories=inpt.categories)
 
     def extra_repr(self) -> str:
@@ -50,7 +50,7 @@ class ToImageTensor(Transform):
     def _transform(
         self, inpt: Union[torch.Tensor, PIL.Image.Image, np.ndarray], params: Dict[str, Any]
     ) -> features.Image:
-        return cast(features.Image, F.to_image_tensor(inpt))
+        return F.to_image_tensor(inpt)  # type: ignore[no-any-return]
 
 
 class ToImagePIL(Transform):

diff --git a/torchvision/prototype/transforms/_utils.py b/torchvision/prototype/transforms/_utils.py
@@ -97,7 +97,7 @@ def _check_padding_mode_arg(padding_mode: Literal["constant", "edge", "reflect",
 
 
 def query_bounding_box(flat_inputs: List[Any]) -> features.BoundingBox:
-    bounding_boxes = {inpt for inpt in flat_inputs if isinstance(inpt, features.BoundingBox)}
+    bounding_boxes = [inpt for inpt in flat_inputs if isinstance(inpt, features.BoundingBox)]
     if not bounding_boxes:
         raise TypeError("No bounding box was found in the sample")
     elif len(bounding_boxes) > 1: