@@ -31,6 +31,7 @@ class TestCLIPImageTransform:
3131 [
3232 {
3333 "image_size" : (100 , 400 , 3 ),
34+ "image_type" : "PIL.Image" ,
3435 "expected_shape" : torch .Size ([2 , 3 , 224 , 224 ]),
3536 "resize_to_max_canvas" : False ,
3637 "expected_tile_means" : [0.2230 , 0.1763 ],
@@ -40,6 +41,7 @@ class TestCLIPImageTransform:
4041 },
4142 {
4243 "image_size" : (1000 , 300 , 3 ),
44+ "image_type" : "PIL.Image" ,
4345 "expected_shape" : torch .Size ([4 , 3 , 224 , 224 ]),
4446 "resize_to_max_canvas" : True ,
4547 "expected_tile_means" : [0.5007 , 0.4995 , 0.5003 , 0.1651 ],
@@ -49,6 +51,7 @@ class TestCLIPImageTransform:
4951 },
5052 {
5153 "image_size" : (200 , 200 , 3 ),
54+ "image_type" : "PIL.Image" ,
5255 "expected_shape" : torch .Size ([4 , 3 , 224 , 224 ]),
5356 "resize_to_max_canvas" : True ,
5457 "expected_tile_means" : [0.5012 , 0.5020 , 0.5011 , 0.4991 ],
@@ -59,6 +62,48 @@ class TestCLIPImageTransform:
5962 },
6063 {
6164 "image_size" : (600 , 200 , 3 ),
65+ "image_type" : "torch.Tensor" ,
66+ "expected_shape" : torch .Size ([3 , 3 , 224 , 224 ]),
67+ "resize_to_max_canvas" : False ,
68+ "expected_tile_means" : [0.4473 , 0.4469 , 0.3032 ],
69+ "expected_tile_max" : [1.0 , 1.0 , 1.0 ],
70+ "expected_tile_min" : [0.0 , 0.0 , 0.0 ],
71+ "expected_aspect_ratio" : [3 , 1 ],
72+ },
73+ {
74+ "image_size" : (100 , 400 , 3 ),
75+ "image_type" : "torch.Tensor" ,
76+ "expected_shape" : torch .Size ([2 , 3 , 224 , 224 ]),
77+ "resize_to_max_canvas" : False ,
78+ "expected_tile_means" : [0.2230 , 0.1763 ],
79+ "expected_tile_max" : [1.0 , 1.0 ],
80+ "expected_tile_min" : [0.0 , 0.0 ],
81+ "expected_aspect_ratio" : [1 , 2 ],
82+ },
83+ {
84+ "image_size" : (1000 , 300 , 3 ),
85+ "image_type" : "torch.Tensor" ,
86+ "expected_shape" : torch .Size ([4 , 3 , 224 , 224 ]),
87+ "resize_to_max_canvas" : True ,
88+ "expected_tile_means" : [0.5007 , 0.4995 , 0.5003 , 0.1651 ],
89+ "expected_tile_max" : [0.9705 , 0.9694 , 0.9521 , 0.9314 ],
90+ "expected_tile_min" : [0.0353 , 0.0435 , 0.0528 , 0.0 ],
91+ "expected_aspect_ratio" : [4 , 1 ],
92+ },
93+ {
94+ "image_size" : (200 , 200 , 3 ),
95+ "image_type" : "torch.Tensor" ,
96+ "expected_shape" : torch .Size ([4 , 3 , 224 , 224 ]),
97+ "resize_to_max_canvas" : True ,
98+ "expected_tile_means" : [0.5012 , 0.5020 , 0.5011 , 0.4991 ],
99+ "expected_tile_max" : [0.9922 , 0.9926 , 0.9970 , 0.9908 ],
100+ "expected_tile_min" : [0.0056 , 0.0069 , 0.0059 , 0.0033 ],
101+ "expected_aspect_ratio" : [2 , 2 ],
102+ "pad_tiles" : 1 ,
103+ },
104+ {
105+ "image_size" : (600 , 200 , 3 ),
106+ "image_type" : "torch.Tensor" ,
62107 "expected_shape" : torch .Size ([3 , 3 , 224 , 224 ]),
63108 "resize_to_max_canvas" : False ,
64109 "expected_tile_means" : [0.4473 , 0.4469 , 0.3032 ],
@@ -99,7 +144,10 @@ def test_clip_image_transform(self, params):
99144 .reshape (image_size )
100145 .astype (np .uint8 )
101146 )
102- image = PIL .Image .fromarray (image )
147+ if params ["image_type" ] == "PIL.Image" :
148+ image = PIL .Image .fromarray (image )
149+ elif params ["image_type" ] == "torch.Tensor" :
150+ image = torch .from_numpy (image ).permute (2 , 0 , 1 )
103151
104152 # Apply the transformation
105153 output = image_transform ({"image" : image })
0 commit comments