-
Notifications
You must be signed in to change notification settings - Fork 779
Closed
Description
描述
使用Xinference本地部署的jina-embeddings-v3,当前模型默认是1024维,但是支持多个维度,翻了几遍官方文档也没看到哪里设置。
还是说调用接口的测试传递了?当前部署时如截图所示这样传递truncate_dim参数也没用,日志里面truncate_dim还是显示为null
截图
部署日志
2025-05-20 16:05:25,968 xinference.core.worker 62294 INFO [request 2f67733c-3551-11f0-aefd-2a81d80ea049] Leave terminate_model, elapsed time: 0 s
2025-05-20 16:06:06,463 xinference.core.worker 62294 INFO [request 47f96a5e-3551-11f0-aefd-2a81d80ea049] Enter launch_builtin_model, args: <xinference.core.worker.WorkerActor object at 0x315bb4810>, kwargs: model_uid=jina-embeddings-v3-0,model_name=jina-embeddings-v3,model_size_in_billions=None,model_format=None,quantization=None,model_engine=None,model_type=embedding,n_gpu=None,request_limits=None,peft_model_config=None,gpu_idx=None,download_hub=None,model_path=None,xavier_config=None,truncate_dim=256,dimensions=256
2025-05-20 16:06:11,428 xinference.core.model 56963 INFO Start requests handler.
/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/bitsandbytes/cextension.py:34: UserWarning: The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.
warn("The installed version of bitsandbytes was compiled without GPU support. "
'NoneType' object has no attribute 'cadam32bit_grad_fp32'
2025-05-20 16:06:12,000 transformers.configuration_utils 56963 INFO loading configuration file /Users/bingjiang/.xinference/cache/jina-embeddings-v3/config.json
loading configuration file /Users/bingjiang/.xinference/cache/jina-embeddings-v3/config.json
2025-05-20 16:06:13,142 transformers.configuration_utils 56963 INFO loading configuration file /Users/bingjiang/.xinference/cache/jina-embeddings-v3/config.json
loading configuration file /Users/bingjiang/.xinference/cache/jina-embeddings-v3/config.json
2025-05-20 16:06:13,143 transformers.configuration_utils 56963 INFO Model config XLMRobertaFlashConfig {
"architectures": [
"XLMRobertaModel"
],
"attention_probs_dropout_prob": 0.1,
"auto_map": {
"AutoConfig": "jinaai/xlm-roberta-flash-implementation--configuration_xlm_roberta.XLMRobertaFlashConfig",
"AutoModel": "jinaai/xlm-roberta-flash-implementation--modeling_lora.XLMRobertaLoRA",
"AutoModelForMaskedLM": "jinaai/xlm-roberta-flash-implementation--modeling_xlm_roberta.XLMRobertaForMaskedLM",
"AutoModelForPreTraining": "jinaai/xlm-roberta-flash-implementation--modeling_xlm_roberta.XLMRobertaForPreTraining"
},
"bos_token_id": 0,
"classifier_dropout": null,
"emb_pooler": null,
"eos_token_id": 2,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 1024,
"initializer_range": 0.02,
"intermediate_size": 4096,
"layer_norm_eps": 1e-05,
"load_trained_adapters": true,
"lora_adaptations": [
"retrieval.query",
"retrieval.passage",
"separation",
"classification",
"text-matching"
],
"lora_alpha": 1,
"lora_dropout_p": 0.0,
"lora_main_params_trainable": false,
"lora_rank": 4,
"matryoshka_dimensions": [
32,
64,
128,
256,
512,
768,
1024
],
"max_position_embeddings": 8194,
"model_type": "xlm-roberta",
"num_attention_heads": 16,
"num_hidden_layers": 24,
"output_past": true,
"pad_token_id": 1,
"position_embedding_type": "rotary",
"rotary_emb_base": 20000.0,
"task_instructions": {
"classification": "",
"retrieval.passage": "Represent the document for retrieval: ",
"retrieval.query": "Represent the query for retrieving evidence documents: ",
"separation": "",
"text-matching": ""
},
"torch_dtype": "float32",
"transformers_version": "4.51.3",
"truncate_dim": null,
"type_vocab_size": 1,
"use_cache": true,
"use_flash_attn": true,
"use_reentrant": false,
"vocab_size": 250002
}
Model config XLMRobertaFlashConfig {
"architectures": [
"XLMRobertaModel"
],
"attention_probs_dropout_prob": 0.1,
"auto_map": {
"AutoConfig": "jinaai/xlm-roberta-flash-implementation--configuration_xlm_roberta.XLMRobertaFlashConfig",
"AutoModel": "jinaai/xlm-roberta-flash-implementation--modeling_lora.XLMRobertaLoRA",
"AutoModelForMaskedLM": "jinaai/xlm-roberta-flash-implementation--modeling_xlm_roberta.XLMRobertaForMaskedLM",
"AutoModelForPreTraining": "jinaai/xlm-roberta-flash-implementation--modeling_xlm_roberta.XLMRobertaForPreTraining"
},
"bos_token_id": 0,
"classifier_dropout": null,
"emb_pooler": null,
"eos_token_id": 2,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 1024,
"initializer_range": 0.02,
"intermediate_size": 4096,
"layer_norm_eps": 1e-05,
"load_trained_adapters": true,
"lora_adaptations": [
"retrieval.query",
"retrieval.passage",
"separation",
"classification",
"text-matching"
],
"lora_alpha": 1,
"lora_dropout_p": 0.0,
"lora_main_params_trainable": false,
"lora_rank": 4,
"matryoshka_dimensions": [
32,
64,
128,
256,
512,
768,
1024
],
"max_position_embeddings": 8194,
"model_type": "xlm-roberta",
"num_attention_heads": 16,
"num_hidden_layers": 24,
"output_past": true,
"pad_token_id": 1,
"position_embedding_type": "rotary",
"rotary_emb_base": 20000.0,
"task_instructions": {
"classification": "",
"retrieval.passage": "Represent the document for retrieval: ",
"retrieval.query": "Represent the query for retrieving evidence documents: ",
"separation": "",
"text-matching": ""
},
"torch_dtype": "float32",
"transformers_version": "4.51.3",
"truncate_dim": null,
"type_vocab_size": 1,
"use_cache": true,
"use_flash_attn": true,
"use_reentrant": false,
"vocab_size": 250002
}