@@ -640,7 +640,8 @@ async def chat_completions_v1(request: ChatCompletionRequest, raw_request: Reque
640
640
is_dummy_prefill = node_manager .dummy_prefill ).model_dump (mode = 'json' )
641
641
642
642
start = node_manager .pre_call (d_url )
643
- node_manager .pd_connection_pool .shelf_prefill_session ((p_url , d_url ), prefill_info ['id' ])
643
+ if not node_manager .dummy_prefill :
644
+ node_manager .pd_connection_pool .shelf_prefill_session ((p_url , d_url ), prefill_info ['id' ])
644
645
if request .stream is True :
645
646
response = node_manager .stream_generate (request_dict , d_url , '/v1/chat/completions' )
646
647
background_task = node_manager .create_background_tasks (d_url , start )
@@ -781,6 +782,8 @@ async def completions_v1(request: CompletionRequest, raw_request: Request = None
781
782
is_dummy_prefill = node_manager .dummy_prefill ).model_dump (mode = 'json' )
782
783
783
784
start = node_manager .pre_call (d_url )
785
+ if not node_manager .dummy_prefill :
786
+ node_manager .pd_connection_pool .shelf_prefill_session ((p_url , d_url ), prefill_info ['id' ])
784
787
if request .stream is True :
785
788
response = node_manager .stream_generate (request_dict , d_url , '/v1/completions' )
786
789
background_task = node_manager .create_background_tasks (d_url , start )
0 commit comments