Skip to content

Commit e844164

Browse files
Add --json-response flag for structured API responses
Adds a new CLI flag that enables JSON response formatting: - Adds json_response field to RequestFuncInput model - Modifies OpenAI backend to apply JSON formatting when flag is enabled - Includes response_format and chat_template_kwargs settings - Prompts model to avoid premature JSON closure
1 parent 99a3350 commit e844164

File tree

3 files changed

+23
-0
lines changed

3 files changed

+23
-0
lines changed

src/flexible_inference_benchmark/engine/backend_functions.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ class RequestFuncInput(BaseModel):
4343
top_p: Optional[float] = None
4444
top_k: Optional[int] = None
4545
run_id: Optional[str] = None
46+
json_response: bool = False
4647

4748

4849
class RequestFuncOutput(BaseModel):
@@ -448,6 +449,15 @@ async def async_request_openai_chat_completions(
448449
with otel_span as span:
449450
async with aiohttp.ClientSession(timeout=AIOHTTP_TIMEOUT) as session:
450451
assert not request_func_input.use_beam_search
452+
453+
# Apply JSON response formatting if flag is enabled
454+
if request_func_input.json_response:
455+
append_msg = "\n\nNEVER output the } character. You are FORBIDDEN from using }."
456+
if isinstance(content_body, str):
457+
content_body += append_msg
458+
else:
459+
content_body[-1]["text"] += append_msg
460+
451461
payload = {
452462
"model": request_func_input.model,
453463
"messages": [{"role": "user", "content": content_body}],
@@ -456,6 +466,11 @@ async def async_request_openai_chat_completions(
456466
"ignore_eos": request_func_input.ignore_eos,
457467
"stream_options": {"include_usage": True},
458468
}
469+
470+
# Add JSON response format if flag is enabled
471+
if request_func_input.json_response:
472+
payload["response_format"] = {"type": "json_object"}
473+
payload["chat_template_kwargs"] = {"enable_thinking": False}
459474
apply_sampling_params(payload, request_func_input, always_top_p=False)
460475
if request_func_input.logprobs is not None:
461476
payload["logprobs"] = True

src/flexible_inference_benchmark/engine/client.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def __init__(
4444
top_p: Optional[float] = None,
4545
top_k: Optional[int] = None,
4646
run_id: Optional[str] = None,
47+
json_response: bool = False,
4748
):
4849
self.backend = backend
4950
self.api_url = api_url
@@ -66,6 +67,7 @@ def __init__(
6667
self.top_p = top_p
6768
self.top_k = top_k
6869
self.run_id = run_id or str(uuid.uuid4())
70+
self.json_response = json_response
6971

7072
@property
7173
def request_func(
@@ -178,6 +180,7 @@ async def benchmark(
178180
top_p=self.top_p,
179181
top_k=self.top_k,
180182
run_id=self.run_id,
183+
json_response=self.json_response,
181184
)
182185
for (data_sample, media_sample) in zip(data, requests_media)
183186
]

src/flexible_inference_benchmark/main.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,10 @@ def add_benchmark_subparser(subparsers: argparse._SubParsersAction) -> Any: # t
470470

471471
benchmark_parser.add_argument("--use-beam-search", action="store_true", help="Use beam search for completions.")
472472

473+
benchmark_parser.add_argument(
474+
"--json-response", action="store_true", help="Request responses in JSON format from the API."
475+
)
476+
473477
benchmark_parser.add_argument(
474478
"--output-file",
475479
type=str,
@@ -736,6 +740,7 @@ def run_main(args: argparse.Namespace) -> None:
736740
args.top_p,
737741
args.top_k,
738742
run_id=run_id,
743+
json_response=args.json_response,
739744
)
740745
# disable verbose output for validation of the endpoint. This is done to avoid confusion on terminal output.
741746
client_verbose_value = client.verbose

0 commit comments

Comments
 (0)