1
1
# SPDX-License-Identifier: Apache-2.0
2
2
"""Example Python client for `vllm.entrypoints.api_server`
3
+ Start the demo server:
4
+ python -m vllm.entrypoints.api_server --model <model_name>
5
+
3
6
NOTE: The API server is used only for demonstration and simple performance
4
7
benchmarks. It is not intended for production use.
5
8
For production use, we recommend `vllm serve` and the OpenAI client API.
6
9
"""
7
10
8
11
import argparse
9
12
import json
13
+ from argparse import Namespace
10
14
from collections .abc import Iterable
11
15
12
16
import requests
@@ -27,7 +31,6 @@ def post_http_request(prompt: str,
27
31
pload = {
28
32
"prompt" : prompt ,
29
33
"n" : n ,
30
- "use_beam_search" : True ,
31
34
"temperature" : 0.0 ,
32
35
"max_tokens" : 16 ,
33
36
"stream" : stream ,
@@ -55,14 +58,7 @@ def get_response(response: requests.Response) -> list[str]:
55
58
return output
56
59
57
60
58
- if __name__ == "__main__" :
59
- parser = argparse .ArgumentParser ()
60
- parser .add_argument ("--host" , type = str , default = "localhost" )
61
- parser .add_argument ("--port" , type = int , default = 8000 )
62
- parser .add_argument ("--n" , type = int , default = 4 )
63
- parser .add_argument ("--prompt" , type = str , default = "San Francisco is a" )
64
- parser .add_argument ("--stream" , action = "store_true" )
65
- args = parser .parse_args ()
61
+ def main (args : Namespace ):
66
62
prompt = args .prompt
67
63
api_url = f"http://{ args .host } :{ args .port } /generate"
68
64
n = args .n
@@ -83,3 +79,14 @@ def get_response(response: requests.Response) -> list[str]:
83
79
output = get_response (response )
84
80
for i , line in enumerate (output ):
85
81
print (f"Beam candidate { i } : { line !r} " , flush = True )
82
+
83
+
84
+ if __name__ == "__main__" :
85
+ parser = argparse .ArgumentParser ()
86
+ parser .add_argument ("--host" , type = str , default = "localhost" )
87
+ parser .add_argument ("--port" , type = int , default = 8000 )
88
+ parser .add_argument ("--n" , type = int , default = 1 )
89
+ parser .add_argument ("--prompt" , type = str , default = "San Francisco is a" )
90
+ parser .add_argument ("--stream" , action = "store_true" )
91
+ args = parser .parse_args ()
92
+ main (args )
0 commit comments