Skip to content

Commit c328ae1

Browse files
authored
FEAT:support MiniCPM4 Series (#3609)
1 parent b139700 commit c328ae1

File tree

3 files changed

+100
-0
lines changed

3 files changed

+100
-0
lines changed

xinference/model/llm/llm_family.json

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6142,6 +6142,53 @@
61426142
"</s>"
61436143
]
61446144
},
6145+
{
6146+
"version": 1,
6147+
"context_length": 32768,
6148+
"model_name": "minicpm4",
6149+
"model_lang": [
6150+
"zh"
6151+
],
6152+
"model_ability": [
6153+
"chat"
6154+
],
6155+
"model_description": "MiniCPM4 series are highly efficient large language models (LLMs) designed explicitly for end-side devices, which achieves this efficiency through systematic innovation in four key dimensions: model architecture, training data, training algorithms, and inference systems.",
6156+
"model_specs": [
6157+
{
6158+
"model_format": "pytorch",
6159+
"model_size_in_billions": "0_5",
6160+
"quantizations": [
6161+
"none"
6162+
],
6163+
"model_id": "openbmb/MiniCPM4-0.5B"
6164+
},
6165+
{
6166+
"model_format": "pytorch",
6167+
"model_size_in_billions": 8,
6168+
"quantizations": [
6169+
"none"
6170+
],
6171+
"model_id": "openbmb/MiniCPM4-8B"
6172+
},
6173+
{
6174+
"model_format": "mlx",
6175+
"model_size_in_billions": 8,
6176+
"quantizations": [
6177+
"4bit"
6178+
],
6179+
"model_id": "mlx-community/MiniCPM4-8B-4bit"
6180+
}
6181+
],
6182+
"chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
6183+
"stop_token_ids": [
6184+
2,
6185+
73440
6186+
],
6187+
"stop": [
6188+
"</s>",
6189+
"<|im_end|>"
6190+
]
6191+
},
61456192
{
61466193
"version": 1,
61476194
"context_length": 32768,

xinference/model/llm/llm_family_modelscope.json

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4277,6 +4277,56 @@
42774277
"</s>"
42784278
]
42794279
},
4280+
{
4281+
"version": 1,
4282+
"context_length": 32768,
4283+
"model_name": "minicpm4",
4284+
"model_lang": [
4285+
"zh"
4286+
],
4287+
"model_ability": [
4288+
"chat"
4289+
],
4290+
"model_description": "MiniCPM4 series are highly efficient large language models (LLMs) designed explicitly for end-side devices, which achieves this efficiency through systematic innovation in four key dimensions: model architecture, training data, training algorithms, and inference systems.",
4291+
"model_specs": [
4292+
{
4293+
"model_format": "pytorch",
4294+
"model_size_in_billions": "0_5",
4295+
"quantizations": [
4296+
"none"
4297+
],
4298+
"model_id": "OpenBMB/MiniCPM4-0.5B",
4299+
"model_hub": "modelscope"
4300+
},
4301+
{
4302+
"model_format": "pytorch",
4303+
"model_size_in_billions": 8,
4304+
"quantizations": [
4305+
"none"
4306+
],
4307+
"model_id": "OpenBMB/MiniCPM4-8B",
4308+
"model_hub": "modelscope"
4309+
},
4310+
{
4311+
"model_format": "mlx",
4312+
"model_size_in_billions": 8,
4313+
"quantizations": [
4314+
"4bit"
4315+
],
4316+
"model_id": "mlx-community/MiniCPM4-8B-4bit",
4317+
"model_hub": "modelscope"
4318+
}
4319+
],
4320+
"chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
4321+
"stop_token_ids": [
4322+
2,
4323+
73440
4324+
],
4325+
"stop": [
4326+
"</s>",
4327+
"<|im_end|>"
4328+
]
4329+
},
42804330
{
42814331
"version": 1,
42824332
"context_length": 32768,

xinference/model/llm/vllm/core.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,9 @@ class VLLMGenerateConfig(TypedDict, total=False):
252252
if VLLM_INSTALLED and vllm.__version__ >= "0.8.5":
253253
VLLM_SUPPORTED_CHAT_MODELS.append("qwen3")
254254

255+
if VLLM_INSTALLED and vllm.__version__ >= "0.9.1":
256+
VLLM_SUPPORTED_CHAT_MODELS.append("minicpm4")
257+
255258

256259
class VLLMModel(LLM):
257260
def __init__(

0 commit comments

Comments
 (0)