Skip to content

Commit 98ff679

Browse files
ArthurZuckerclaude
authored andcommitted
Fix release full (huggingface#45029)
* first part of the fix * fix torch imports * revert * fix: make from transformers import * work without torch - `is_torchvision_available`, `is_timm_available`, `is_torchaudio_available`, `is_torchao_available`, `is_accelerate_available` now return False when torch is not installed, since all these packages require torch - Add `@requires(backends=("torch",))` to `PI0Processor` (was missing, causing the lazy module to crash on import without torch) - Fix wrong availability guards: `is_vision_available` → `is_torchvision_available` in pixtral processor, `is_torch_available` in smolvlm processor - Wrap bare `import torch` / torchvision imports in `processing_sam3_video.py` - Quote `torch.Tensor` in return type annotation of `tokenization_mistral_common.py` - Wrap 66 `image_processing_pil_*.py` imports from torch-dependent counterparts in try/except with ImagesKwargs fallbacks; quote `torch.Tensor` annotations - Restore explicit `from transformers import *` check in CircleCI `check_repository_consistency` job Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com> * up * style of this * revert: remove src/models changes, keep only core import fixes The PIL image processor changes are too fragile (break on make fix-repo). Keep only the core fixes: - is_torchvision/timm/torchaudio/torchao/accelerate_available() check torch - CircleCI explicit import check - tokenization_mistral_common.py torch.Tensor annotation - processing_sam3_video.py conditional torch imports - processing_pixtral.py/processing_smolvlm.py availability guard fixes - PI0Processor @requires decorator Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com> * nit * the mega quidproquo * use rquires(backend * more pil fixes * fixes * temp update * up? * is this it? * style? * revert a bunch of ai shit * pi0 requires this * revert some stuffs * upd * the fix * yups * ah * up * up * fix * yes? * update * up * nits * up * up * order --------- Co-authored-by: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
1 parent 2d5c6a8 commit 98ff679

101 files changed

Lines changed: 478 additions & 768 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.circleci/config.yml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,28 @@ jobs:
175175
- store_artifacts:
176176
path: ~/transformers/installed.txt
177177
- run: make check-repository-consistency
178+
- run:
179+
name: "Test import with all backends (torch + PIL + torchvision)"
180+
command: python -c "from transformers import *" || (echo '🚨 import failed with all backends. Fix unprotected imports!! 🚨'; exit 1)
181+
- run:
182+
name: "Test import with torch only (no PIL, no torchvision)"
183+
command: |
184+
uv pip uninstall Pillow torchvision -q
185+
python -c "from transformers import *" || (echo '🚨 import failed with torch only (no PIL). Fix unprotected imports!! 🚨'; exit 1)
186+
uv pip install -e ".[quality]" -q
187+
- run:
188+
name: "Test import with PIL only (no torch, no torchvision)"
189+
command: |
190+
uv pip uninstall torch torchvision torchaudio -q
191+
python -c "from transformers import *" || (echo '🚨 import failed with PIL only (no torch). Fix unprotected imports!! 🚨'; exit 1)
192+
uv pip install -e ".[quality]" -q
193+
- run:
194+
name: "Test import with torch + PIL, no torchvision"
195+
command: |
196+
uv pip uninstall torchvision -q
197+
python -c "from transformers import *" || (echo '🚨 import failed with torch+PIL but no torchvision. Fix unprotected imports!! 🚨'; exit 1)
198+
uv pip install -e ".[quality]" -q
199+
178200
179201
workflows:
180202
version: 2

src/transformers/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@
330330
name for name in dir(dummy_vision_objects) if not name.startswith("_")
331331
]
332332
else:
333-
_import_structure["image_processing_backends"] = ["PilBackend", "TorchvisionBackend"]
333+
_import_structure["image_processing_backends"] = ["PilBackend"]
334334
_import_structure["image_processing_base"] = ["ImageProcessingMixin"]
335335
_import_structure["image_processing_utils"] = ["BaseImageProcessor"]
336336
_import_structure["image_utils"] = ["ImageFeatureExtractionMixin"]
@@ -345,6 +345,8 @@
345345
name for name in dir(dummy_torchvision_objects) if not name.startswith("_")
346346
]
347347
else:
348+
_import_structure.setdefault("image_processing_backends", [])
349+
_import_structure["image_processing_backends"] += ["TorchvisionBackend"]
348350
_import_structure["video_processing_utils"] = ["BaseVideoProcessor"]
349351

350352
# PyTorch-backed objects

src/transformers/image_processing_backends.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,8 @@
5858
is_torchvision_available,
5959
is_vision_available,
6060
logging,
61-
requires_backends,
6261
)
63-
from .utils.import_utils import is_rocm_platform, is_torchdynamo_compiling
62+
from .utils.import_utils import is_rocm_platform, is_torchdynamo_compiling, requires
6463

6564

6665
if is_vision_available():
@@ -81,11 +80,11 @@
8180
logger = logging.get_logger(__name__)
8281

8382

83+
@requires(backends=("torch", "torchvision"))
8484
class TorchvisionBackend(BaseImageProcessor):
8585
"""Torchvision backend for GPU-accelerated batched image processing."""
8686

8787
def __init__(self, **kwargs: Unpack[ImagesKwargs]):
88-
requires_backends(self, "torchvision")
8988
super().__init__(**kwargs)
9089
self._set_attributes(**kwargs)
9190

@@ -407,6 +406,7 @@ def _preprocess(
407406
return BatchFeature(data={"pixel_values": processed_images}, tensor_type=return_tensors)
408407

409408

409+
@requires(backends=("vision",))
410410
class PilBackend(BaseImageProcessor):
411411
"""PIL/NumPy backend for portable CPU-only image processing."""
412412

src/transformers/models/aria/image_processing_pil_aria.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,12 @@
2525
get_image_size,
2626
)
2727
from ...processing_utils import Unpack
28-
from ...utils import TensorType, auto_docstring, is_torchvision_available
28+
from ...utils import TensorType, auto_docstring
29+
from ...utils.import_utils import requires
2930
from .image_processing_aria import AriaImageProcessorKwargs
3031

3132

32-
if is_torchvision_available():
33-
from torchvision.transforms.v2 import functional as tvF
34-
35-
33+
@requires(backends=("vision", "torch", "torchvision"))
3634
@auto_docstring
3735
class AriaImageProcessorPil(PilBackend):
3836
model_input_names = ["pixel_values", "pixel_mask", "num_crops"]
@@ -67,7 +65,7 @@ def _resize_for_patching(
6765
self,
6866
image: np.ndarray,
6967
target_resolution: tuple,
70-
resample: "PILImageResampling | tvF.InterpolationMode | int | None",
68+
resample: "PILImageResampling | int | None",
7169
) -> np.ndarray:
7270
"""Resize an image to a target resolution while maintaining aspect ratio."""
7371
new_height, new_width = get_patch_output_size(
@@ -92,7 +90,7 @@ def get_image_patches(
9290
image: np.ndarray,
9391
grid_pinpoints: list[list[int]],
9492
patch_size: int,
95-
resample: "PILImageResampling | tvF.InterpolationMode | int | None",
93+
resample: "PILImageResampling | int | None",
9694
) -> list[np.ndarray]:
9795
"""
9896
Process an image with variable resolutions by dividing it into patches.
@@ -104,7 +102,7 @@ def get_image_patches(
104102
A list of possible resolutions as (height, width) pairs.
105103
patch_size (`int`):
106104
Size of each square patch to divide the image into.
107-
resample (`PILImageResampling | tvF.InterpolationMode | int | None`):
105+
resample (`PILImageResampling | int | None`):
108106
Resampling filter to use when resizing.
109107
110108
Returns:
@@ -133,7 +131,7 @@ def _preprocess(
133131
min_image_size: int = 336,
134132
split_resolutions: list[list[int]] | None = None,
135133
split_image: bool = False,
136-
resample: "PILImageResampling | tvF.InterpolationMode | int | None" = None,
134+
resample: "PILImageResampling | int | None" = None,
137135
**kwargs,
138136
) -> BatchFeature:
139137
if max_image_size not in [490, 980]:

src/transformers/models/beit/image_processing_pil_beit.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
"""Image processor class for BEiT."""
1515

1616
import numpy as np
17+
import torch
18+
import torch.nn.functional as F
19+
from torchvision.transforms.v2 import functional as tvF
1720

1821
from ...image_processing_backends import PilBackend
1922
from ...image_processing_utils import BatchFeature
@@ -26,19 +29,13 @@
2629
SizeDict,
2730
)
2831
from ...processing_utils import Unpack
29-
from ...utils import TensorType, auto_docstring, is_torch_available, is_torchvision_available
32+
from ...utils import TensorType, auto_docstring, is_torch_available
33+
from ...utils.import_utils import requires
3034
from .image_processing_beit import BeitImageProcessorKwargs
3135

3236

33-
if is_torch_available():
34-
import torch
35-
import torch.nn.functional as F
36-
37-
if is_torchvision_available():
38-
from torchvision.transforms.v2 import functional as tvF
39-
40-
4137
@auto_docstring
38+
@requires(backends=("vision", "torch", "torchvision"))
4239
class BeitImageProcessorPil(PilBackend):
4340
"""PIL backend for BEiT with reduce_label support."""
4441

src/transformers/models/bridgetower/image_processing_pil_bridgetower.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,12 @@
2424
SizeDict,
2525
)
2626
from ...processing_utils import Unpack
27-
from ...utils import TensorType, auto_docstring, is_torchvision_available
27+
from ...utils import TensorType, auto_docstring
28+
from ...utils.import_utils import requires
2829
from .image_processing_bridgetower import BridgeTowerImageProcessorKwargs, get_resize_output_image_size
2930

3031

31-
if is_torchvision_available():
32-
from torchvision.transforms.v2 import functional as tvF
33-
34-
32+
@requires(backends=("vision", "torch", "torchvision"))
3533
@auto_docstring
3634
class BridgeTowerImageProcessorPil(PilBackend):
3735
"""PIL backend for BridgeTower with custom resize and center_crop."""
@@ -59,7 +57,7 @@ def resize(
5957
self,
6058
image: np.ndarray,
6159
size: SizeDict,
62-
resample: "PILImageResampling | tvF.InterpolationMode | int | None",
60+
resample: "PILImageResampling | int | None",
6361
size_divisor: int = 32,
6462
**kwargs,
6563
) -> np.ndarray:
@@ -84,7 +82,7 @@ def _preprocess(
8482
images: list[np.ndarray],
8583
do_resize: bool,
8684
size: SizeDict,
87-
resample: "PILImageResampling | tvF.InterpolationMode | int | None",
85+
resample: "PILImageResampling | int | None",
8886
do_center_crop: bool,
8987
crop_size: SizeDict,
9088
do_rescale: bool,

src/transformers/models/conditional_detr/image_processing_pil_conditional_detr.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
logging,
5858
requires_backends,
5959
)
60+
from ...utils.import_utils import requires
6061
from .image_processing_conditional_detr import (
6162
ConditionalDetrImageProcessorKwargs,
6263
compute_segments,
@@ -262,6 +263,7 @@ def prepare_coco_panoptic_annotation(
262263
return new_target
263264

264265

266+
@requires(backends=("vision", "torch", "torchvision"))
265267
@auto_docstring
266268
class ConditionalDetrImageProcessorPil(PilBackend):
267269
resample = PILImageResampling.BILINEAR
@@ -676,6 +678,7 @@ def _preprocess(
676678
]
677679
return encoded_inputs
678680

681+
@requires(backends=("vision", "torch"))
679682
def post_process_object_detection(
680683
self, outputs, threshold: float = 0.5, target_sizes: TensorType | list[tuple] = None, top_k: int = 100
681684
):
@@ -736,6 +739,7 @@ def post_process_object_detection(
736739

737740
return results
738741

742+
@requires(backends=("vision", "torch"))
739743
def post_process_semantic_segmentation(self, outputs, target_sizes: list[tuple[int, int]] | None = None):
740744
"""
741745
Converts the output of [`ConditionalDetrForSegmentation`] into semantic segmentation maps. Only supports PyTorch.
@@ -784,6 +788,7 @@ def post_process_semantic_segmentation(self, outputs, target_sizes: list[tuple[i
784788

785789
return semantic_segmentation
786790

791+
@requires(backends=("vision", "torch"))
787792
def post_process_instance_segmentation(
788793
self,
789794
outputs,
@@ -872,6 +877,7 @@ def post_process_instance_segmentation(
872877
results.append({"segmentation": segmentation, "segments_info": segments})
873878
return results
874879

880+
@requires(backends=("vision", "torch"))
875881
def post_process_panoptic_segmentation(
876882
self,
877883
outputs,

src/transformers/models/conditional_detr/modular_conditional_detr.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
requires_backends,
3535
)
3636
from ...utils.generic import can_return_tuple, merge_with_config_defaults
37+
from ...utils.import_utils import requires
3738
from ...utils.output_capturing import OutputRecorder, capture_outputs
3839
from ..deformable_detr.modeling_deformable_detr import inverse_sigmoid
3940
from ..detr.image_processing_detr import DetrImageProcessor
@@ -173,6 +174,7 @@ def post_process_semantic_segmentation(self, outputs, target_sizes: list[tuple[i
173174

174175

175176
class ConditionalDetrImageProcessorPil(DetrImageProcessorPil):
177+
@requires(backends=("vision", "torch"))
176178
def post_process_object_detection(
177179
self, outputs, threshold: float = 0.5, target_sizes: TensorType | list[tuple] = None, top_k: int = 100
178180
):
@@ -233,6 +235,7 @@ def post_process_object_detection(
233235

234236
return results
235237

238+
@requires(backends=("vision", "torch"))
236239
def post_process_semantic_segmentation(self, outputs, target_sizes: list[tuple[int, int]] | None = None):
237240
"""
238241
Converts the output of [`ConditionalDetrForSegmentation`] into semantic segmentation maps. Only supports PyTorch.

src/transformers/models/convnext/image_processing_pil_convnext.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,12 @@
2626
SizeDict,
2727
)
2828
from ...processing_utils import Unpack
29-
from ...utils import TensorType, auto_docstring, is_torchvision_available
29+
from ...utils import TensorType, auto_docstring
30+
from ...utils.import_utils import requires
3031
from .image_processing_convnext import ConvNextImageProcessorKwargs
3132

3233

33-
if is_torchvision_available():
34-
from torchvision.transforms.v2 import functional as tvF
35-
36-
34+
@requires(backends=("vision", "torch", "torchvision"))
3735
@auto_docstring
3836
class ConvNextImageProcessorPil(PilBackend):
3937
"""PIL backend for ConvNeXT with custom resize."""
@@ -57,7 +55,7 @@ def resize(
5755
self,
5856
image: np.ndarray,
5957
size: SizeDict,
60-
resample: "PILImageResampling | tvF.InterpolationMode | int | None",
58+
resample: "PILImageResampling | int | None",
6159
crop_pct: float = 224 / 256,
6260
**kwargs,
6361
) -> np.ndarray:
@@ -98,7 +96,7 @@ def _preprocess(
9896
images: list[np.ndarray],
9997
do_resize: bool,
10098
size: SizeDict,
101-
resample: "PILImageResampling | tvF.InterpolationMode | int | None",
99+
resample: "PILImageResampling | int | None",
102100
do_center_crop: bool,
103101
crop_size: SizeDict,
104102
do_rescale: bool,

src/transformers/models/deepseek_vl/image_processing_pil_deepseek_vl.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,11 @@
3434
)
3535
from ...processing_utils import Unpack
3636
from ...utils import TensorType, auto_docstring
37+
from ...utils.import_utils import requires
3738
from .image_processing_deepseek_vl import DeepseekVLImageProcessorKwargs
3839

3940

41+
@requires(backends=("vision", "torch", "torchvision"))
4042
@auto_docstring
4143
class DeepseekVLImageProcessorPil(PilBackend):
4244
resample = PILImageResampling.BICUBIC

0 commit comments

Comments
 (0)