Skip to content

Commit df6afff

Browse files
SangChengChiworldwzjliujiacheng
authored
[opt]opti-qwen2-vl-vit (#1004)
Co-authored-by: hiworldwzj <[email protected]> Co-authored-by: none <none> Co-authored-by: liujiacheng <[email protected]>
1 parent 5b3e319 commit df6afff

File tree

10 files changed

+421
-775
lines changed

10 files changed

+421
-775
lines changed

lightllm/models/qwen2_5_vl/qwen2_5_visual.py

Lines changed: 96 additions & 171 deletions
Large diffs are not rendered by default.

lightllm/models/qwen2_vl/model.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ class QWen2VLTokenizer(BaseMultiModalTokenizer):
3131
def __init__(self, tokenizer=None, image_processor=None, **kwargs):
3232
super().__init__(tokenizer)
3333
self.image_processor = image_processor
34+
self.min_pixel = self.image_processor.min_pixels
35+
self.max_pixel = self.image_processor.max_pixels
36+
self.patch_size = self.image_processor.patch_size
37+
self.merge_size = self.image_processor.merge_size
3438
self.image_start_id = kwargs["model_cfg"]["vision_start_token_id"]
3539
self.image_end_id = kwargs["model_cfg"]["vision_end_token_id"]
3640
self.image_token_id = kwargs["model_cfg"]["image_token_id"]
@@ -46,17 +50,13 @@ def init_audioitem_extral_params(
4650
raise NotImplementedError
4751

4852
def get_image_token_length(self, img: ImageItem):
49-
width = img.image_w
50-
height = img.image_h
51-
resized_height, resized_width = smart_resize(height=height, width=width)
52-
self.patch_size = self.image_processor.image_processor.patch_size
53-
self.merge_size = self.image_processor.image_processor.merge_size
54-
grid_t = 1
53+
width, height = img.image_w, img.image_h
54+
resized_height, resized_width = smart_resize(
55+
height=height, width=width, min_pixels=self.min_pixel, max_pixels=self.max_pixel
56+
)
5557
grid_h, grid_w = resized_height // self.patch_size, resized_width // self.patch_size
56-
merge_length = self.merge_size ** 2
57-
self.token_num = (grid_t * grid_h * grid_w) // merge_length
58-
self.image_length = self.token_num
59-
return self.image_length
58+
token_num = (grid_h * grid_w) // (self.merge_size ** 2)
59+
return token_num
6060

6161
def get_audio_token_length(self, audio: AudioItem):
6262
raise NotImplementedError

0 commit comments

Comments
 (0)