Skip to content

Commit 57f1fd3

Browse files
authored
fix: add dummy_prefill guard for PD connection operations (#3803)
1 parent 79c6a4d commit 57f1fd3

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

lmdeploy/serve/proxy/proxy.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -640,7 +640,8 @@ async def chat_completions_v1(request: ChatCompletionRequest, raw_request: Reque
640640
is_dummy_prefill=node_manager.dummy_prefill).model_dump(mode='json')
641641

642642
start = node_manager.pre_call(d_url)
643-
node_manager.pd_connection_pool.shelf_prefill_session((p_url, d_url), prefill_info['id'])
643+
if not node_manager.dummy_prefill:
644+
node_manager.pd_connection_pool.shelf_prefill_session((p_url, d_url), prefill_info['id'])
644645
if request.stream is True:
645646
response = node_manager.stream_generate(request_dict, d_url, '/v1/chat/completions')
646647
background_task = node_manager.create_background_tasks(d_url, start)
@@ -781,6 +782,8 @@ async def completions_v1(request: CompletionRequest, raw_request: Request = None
781782
is_dummy_prefill=node_manager.dummy_prefill).model_dump(mode='json')
782783

783784
start = node_manager.pre_call(d_url)
785+
if not node_manager.dummy_prefill:
786+
node_manager.pd_connection_pool.shelf_prefill_session((p_url, d_url), prefill_info['id'])
784787
if request.stream is True:
785788
response = node_manager.stream_generate(request_dict, d_url, '/v1/completions')
786789
background_task = node_manager.create_background_tasks(d_url, start)

0 commit comments

Comments
 (0)