-
Notifications
You must be signed in to change notification settings - Fork 949
Description
Hi,
I'm unable to run inference on Intel Integrated GPU on qwen2-vl-2B model. It works fine if I select CPU as the device.
The exception happens with error code "-5"
The stack trace:
File "ov_qwen2_vl.py", line 763, in forward
self.request.wait()
RuntimeError: Exception from src/inference/src/cpp/infer_request.cpp:245:
Exception from src/bindings/python/src/pyopenvino/core/infer_request.hpp:54:
Caught exception: Exception from src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp:365:
[GPU] clFlush, error code: -5
System details:
OS:
Operating System: Ubuntu 22.04.5 LTS
Kernel: Linux 6.8.0-52-generic
GPU:
description: VGA compatible controller
product: HD Graphics 630
vendor: Intel Corporation
import openvino as ov
core = ov.Core()
print(core.available_devices)
['CPU', 'GPU']
Code snapshot
from pathlib import Path
import requests
from ov_qwen2_vl import model_selector
model_id = model_selector()
print(f"Selected {model_id.value}")
pt_model_id = model_id.value
model_dir = Path(pt_model_id.split("/")[-1])
from ov_qwen2_vl import convert_qwen2vl_model
uncomment these lines to see model conversion code
convert_qwen2vl_model??
import nncf
compression_configuration = {
"mode": nncf.CompressWeightsMode.INT4_ASYM,
"group_size": 128,
"ratio": 1.0,
}
convert_qwen2vl_model(pt_model_id, model_dir, compression_configuration)
from ov_qwen2_vl import OVQwen2VLModel
Uncomment below lines to see the model inference class code
OVQwen2VLModel??
from notebook_utils import device_widget
device = device_widget(default="AUTO", exclude=["NPU"])
model = OVQwen2VLModel(model_dir, device.value)
print(device.value)
from PIL import Image
from transformers import AutoProcessor, AutoTokenizer
from qwen_vl_utils import process_vision_info
from transformers import TextStreamer
min_pixels = 256 * 28 * 28
max_pixels = 1280 * 28 * 28
processor = AutoProcessor.from_pretrained(model_dir, min_pixels=min_pixels, max_pixels=max_pixels)
if processor.chat_template is None:
tok = AutoTokenizer.from_pretrained(model_dir)
processor.chat_template = tok.chat_template
example_image_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"
example_image_path = Path("demo.jpeg")
if not example_image_path.exists():
Image.open(requests.get(example_image_url, stream=True).raw).save(example_image_path)
image = Image.open(example_image_path)
question = "Describe this image."
messages = [
{
"role": "user",
"content": [
{
"type": "image",
"image": f"file://{example_image_path}",
},
{"type": "text", "text": question},
],
}
]
Preparation for inference
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
image_inputs, video_inputs = process_vision_info(messages)
inputs = processor(
text=[text],
images=image_inputs,
videos=video_inputs,
padding=True,
return_tensors="pt",
)
#display(image)
print("Question:")
print(question)
print("Answer:")
generated_ids = model.generate(**inputs, max_new_tokens=100, streamer=TextStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True))