23
23
from typing import Dict
24
24
from typing import Generator
25
25
from typing import Iterable
26
+ from typing import List
26
27
from typing import Literal
27
28
from typing import Optional
28
29
from typing import Tuple
@@ -481,16 +482,22 @@ def _message_to_generate_content_response(
481
482
482
483
def _get_completion_inputs (
483
484
llm_request : LlmRequest ,
484
- ) -> tuple [Iterable [Message ], Iterable [dict ]]:
485
- """Converts an LlmRequest to litellm inputs.
485
+ ) -> Tuple [
486
+ List [Message ],
487
+ Optional [List [Dict ]],
488
+ Optional [types .SchemaUnion ],
489
+ Optional [Dict ],
490
+ ]:
491
+ """Converts an LlmRequest to litellm inputs and extracts generation params.
486
492
487
493
Args:
488
494
llm_request: The LlmRequest to convert.
489
495
490
496
Returns:
491
- The litellm inputs (message list, tool dictionary and response format).
497
+ The litellm inputs (message list, tool dictionary, response format, and generation params ).
492
498
"""
493
- messages = []
499
+ # 1. Construct messages
500
+ messages : List [Message ] = []
494
501
for content in llm_request .contents or []:
495
502
message_param_or_list = _content_to_message_param (content )
496
503
if isinstance (message_param_or_list , list ):
@@ -507,7 +514,8 @@ def _get_completion_inputs(
507
514
),
508
515
)
509
516
510
- tools = None
517
+ # 2. Convert tool declarations
518
+ tools : Optional [List [Dict ]] = None
511
519
if (
512
520
llm_request .config
513
521
and llm_request .config .tools
@@ -518,12 +526,39 @@ def _get_completion_inputs(
518
526
for tool in llm_request .config .tools [0 ].function_declarations
519
527
]
520
528
521
- response_format = None
529
+ # 3. Handle response format
530
+ response_format : Optional [types .SchemaUnion ] = (
531
+ llm_request .config .response_schema if llm_request .config else None
532
+ )
533
+
534
+ # 4. Extract generation parameters
535
+ generation_params : Optional [Dict ] = None
536
+ if llm_request .config :
537
+ config_dict = llm_request .config .model_dump (exclude_none = True )
538
+ # Generate LiteLlm parameters here,
539
+ # Following https://docs.litellm.ai/docs/completion/input.
540
+ generation_params = {}
541
+ param_mapping = {
542
+ "max_output_tokens" : "max_completion_tokens" ,
543
+ "stop_sequences" : "stop" ,
544
+ }
545
+ for key in (
546
+ "temperature" ,
547
+ "max_output_tokens" ,
548
+ "top_p" ,
549
+ "top_k" ,
550
+ "stop_sequences" ,
551
+ "presence_penalty" ,
552
+ "frequency_penalty" ,
553
+ ):
554
+ if key in config_dict :
555
+ mapped_key = param_mapping .get (key , key )
556
+ generation_params [mapped_key ] = config_dict [key ]
522
557
523
- if llm_request . config . response_schema :
524
- response_format = llm_request . config . response_schema
558
+ if not generation_params :
559
+ generation_params = None
525
560
526
- return messages , tools , response_format
561
+ return messages , tools , response_format , generation_params
527
562
528
563
529
564
def _build_function_declaration_log (
@@ -660,15 +695,23 @@ async def generate_content_async(
660
695
self ._maybe_append_user_content (llm_request )
661
696
logger .debug (_build_request_log (llm_request ))
662
697
663
- messages , tools , response_format = _get_completion_inputs (llm_request )
698
+ messages , tools , response_format , generation_params = (
699
+ _get_completion_inputs (llm_request )
700
+ )
664
701
665
702
completion_args = {
666
703
"model" : self .model ,
667
704
"messages" : messages ,
668
705
"tools" : tools ,
669
706
"response_format" : response_format ,
670
707
}
671
- completion_args .update (self ._additional_args )
708
+
709
+ # Merge additional arguments and generation parameters safely
710
+ if hasattr (self , "_additional_args" ) and self ._additional_args :
711
+ completion_args .update (self ._additional_args )
712
+
713
+ if generation_params :
714
+ completion_args .update (generation_params )
672
715
673
716
if stream :
674
717
text = ""
0 commit comments