fix: crop testing, adhere to conventions

justincdavis · justincdavis · commit ed2bd35d572e · 2025-12-04T11:17:22.000-08:00
diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
@@ -3506,6 +3506,9 @@ def test_kernel_video(self):
             make_segmentation_mask,
             make_video,
             make_keypoints,
+            pytest.param(
+                make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="test requires CVCUDA")
+            ),
         ],
     )
     def test_functional(self, make_input):
@@ -3521,6 +3524,11 @@ def test_functional(self, make_input):
             (F.crop_mask, tv_tensors.Mask),
             (F.crop_video, tv_tensors.Video),
             (F.crop_keypoints, tv_tensors.KeyPoints),
+            pytest.param(
+                F._geometry._crop_cvcuda,
+                _import_cvcuda().Tensor,
+                marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="test requires CVCUDA"),
+            ),
         ],
     )
     def test_functional_signature(self, kernel, input_type):
@@ -3549,15 +3557,18 @@ def test_functional_image_correctness(self, kwargs):
             make_segmentation_mask,
             make_video,
             make_keypoints,
+            pytest.param(
+                make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="test requires CVCUDA")
+            ),
         ],
     )
     def test_transform(self, param, value, make_input):
-        input = make_input(self.INPUT_SIZE)
+        input_data = make_input(self.INPUT_SIZE)
 
         check_sample_input = True
         if param == "fill":
             if isinstance(value, (tuple, list)):
-                if isinstance(input, tv_tensors.Mask):
+                if isinstance(input_data, tv_tensors.Mask):
                     pytest.skip("F.pad_mask doesn't support non-scalar fill.")
                 else:
                     check_sample_input = False
@@ -3566,14 +3577,14 @@ def test_transform(self, param, value, make_input):
                 # 1. size is required
                 # 2. the fill parameter only has an affect if we need padding
                 size=[s + 4 for s in self.INPUT_SIZE],
-                fill=adapt_fill(value, dtype=input.dtype if isinstance(input, torch.Tensor) else torch.uint8),
+                fill=adapt_fill(value, dtype=input_data.dtype if isinstance(input_data, torch.Tensor) else torch.uint8),
             )
         else:
             kwargs = {param: value}
 
         check_transform(
             transforms.RandomCrop(**kwargs, pad_if_needed=True),
-            input,
+            input_data,
             check_v1_compatibility=param != "fill" or isinstance(value, (int, float)),
             check_sample_input=check_sample_input,
         )
@@ -3637,6 +3648,31 @@ def test_transform_image_correctness(self, param, value, seed):
 
         assert_equal(actual, expected)
 
+    @pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="test requires CVCUDA")
+    @pytest.mark.parametrize("size", [(10, 5), (25, 15), (25, 5), (10, 15), (10, 10)])
+    @pytest.mark.parametrize("seed", list(range(5)))
+    def test_transform_cvcuda_correctness(self, size, seed):
+        pad_if_needed = False
+        if size[0] > self.INPUT_SIZE[0] or size[1] > self.INPUT_SIZE[1]:
+            pad_if_needed = True
+        transform = transforms.RandomCrop(size, pad_if_needed=pad_if_needed)
+
+        image = make_image(size=self.INPUT_SIZE, batch_dims=(1,), device="cuda")
+        cv_image = F.to_cvcuda_tensor(image)
+
+        with freeze_rng_state():
+            torch.manual_seed(seed)
+            actual = transform(cv_image)
+
+            torch.manual_seed(seed)
+            expected = transform(image)
+
+        if not pad_if_needed:
+            torch.testing.assert_close(F.cvcuda_to_tensor(actual), expected, rtol=0, atol=0)
+        else:
+            # if padding is requied, CV-CUDA will always fill with zeros
+            torch.testing.assert_close(F.cvcuda_to_tensor(actual), expected, rtol=0, atol=get_max_value(image.dtype))
+
     def _reference_crop_bounding_boxes(self, bounding_boxes, *, top, left, height, width):
         affine_matrix = np.array(
             [
@@ -3765,25 +3801,6 @@ def test_errors(self):
             transforms.RandomCrop([10, 12], padding=1, padding_mode="abc")
 
 
-@pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="cvcuda not available")
-@needs_cuda
-class TestCropCVCUDA:
-    def test_functional(self):
-        check_functional(
-            F.crop, make_image_cvcuda(TestCrop.INPUT_SIZE, batch_dims=(1,)), **TestCrop.MINIMAL_CROP_KWARGS
-        )
-
-    def test_functional_signature(self):
-        check_functional_kernel_signature_match(F.crop, kernel=F.crop_cvcuda, input_type=cvcuda.Tensor)
-
-    @pytest.mark.parametrize("size", [(10, 5), (25, 15), (25, 5), (10, 15)])
-    def test_functional_correctness(self, size):
-        image = make_image_cvcuda(TestCrop.INPUT_SIZE, batch_dims=(1,))
-        actual = F.crop(image, 0, 0, *size)
-        expected = F.crop(F.cvcuda_to_tensor(image), 0, 0, *size)
-        assert_equal(F.cvcuda_to_tensor(actual), expected)
-
-
 class TestErase:
     INPUT_SIZE = (17, 11)
     FUNCTIONAL_KWARGS = dict(
diff --git a/torchvision/transforms/v2/_transform.py b/torchvision/transforms/v2/_transform.py
@@ -11,7 +11,7 @@
 from torchvision.transforms.v2._utils import check_type, has_any, is_pure_tensor
 from torchvision.utils import _log_api_usage_once
 
-from .functional._utils import _get_kernel
+from .functional._utils import _get_kernel, is_cvcuda_tensor
 
 
 class Transform(nn.Module):
@@ -23,7 +23,7 @@ class Transform(nn.Module):
 
     # Class attribute defining transformed types. Other types are passed-through without any transformation
     # We support both Types and callables that are able to do further checks on the type of the input.
-    _transformed_types: tuple[type | Callable[[Any], bool], ...] = (torch.Tensor, PIL.Image.Image)
+    _transformed_types: tuple[type | Callable[[Any], bool], ...] = (torch.Tensor, PIL.Image.Image, is_cvcuda_tensor)
 
     def __init__(self) -> None:
         super().__init__()
@@ -90,7 +90,9 @@ def _needs_transform_list(self, flat_inputs: list[Any]) -> list[bool]:
         # However, this case wasn't supported by transforms v1 either, so there is no BC concern.
 
         needs_transform_list = []
-        transform_pure_tensor = not has_any(flat_inputs, tv_tensors.Image, tv_tensors.Video, PIL.Image.Image)
+        transform_pure_tensor = not has_any(
+            flat_inputs, tv_tensors.Image, tv_tensors.Video, PIL.Image.Image, is_cvcuda_tensor
+        )
         for inpt in flat_inputs:
             needs_transform = True
 
diff --git a/torchvision/transforms/v2/functional/__init__.py b/torchvision/transforms/v2/functional/__init__.py
@@ -1,6 +1,6 @@
 from torchvision.transforms import InterpolationMode  # usort: skip
 
-from ._utils import is_pure_tensor, register_kernel  # usort: skip
+from ._utils import is_pure_tensor, register_kernel, is_cvcuda_tensor  # usort: skip
 
 from ._meta import (
     clamp_bounding_boxes,
@@ -76,14 +76,12 @@
     affine_video,
     center_crop,
     center_crop_bounding_boxes,
-    center_crop_cvcuda,
     center_crop_image,
     center_crop_keypoints,
     center_crop_mask,
     center_crop_video,
     crop,
     crop_bounding_boxes,
-    crop_cvcuda,
     crop_image,
     crop_keypoints,
     crop_mask,
diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py
@@ -1924,13 +1924,15 @@ def crop_video(video: torch.Tensor, top: int, left: int, height: int, width: int
     return crop_image(video, top, left, height, width)
 
 
-def crop_cvcuda(
+def _crop_cvcuda(
     image: "cvcuda.Tensor",
     top: int,
     left: int,
     height: int,
     width: int,
 ) -> "cvcuda.Tensor":
+    cvcuda = _import_cvcuda()
+
     image_height, image_width, channels = image.shape[1:]
     top_diff = 0
     left_diff = 0
@@ -1963,7 +1965,7 @@ def crop_cvcuda(
 
 
 if CVCUDA_AVAILABLE:
-    _register_kernel_internal(crop, cvcuda.Tensor)(crop_cvcuda)
+    _crop_cvcuda_registered = _register_kernel_internal(crop, _import_cvcuda().Tensor)(_crop_cvcuda)
 
 
 def perspective(
diff --git a/torchvision/transforms/v2/functional/_utils.py b/torchvision/transforms/v2/functional/_utils.py
@@ -5,6 +5,7 @@
 import torch
 from torchvision import tv_tensors
 
+
 _FillType = Union[int, float, Sequence[int], Sequence[float], None]
 _FillTypeJIT = Optional[list[float]]