Merge pull request #116 from fixie-ai/juberti/strict

juberti · web-flow · commit 8b2011ae0f4a · 2024-08-07T16:01:11.000-07:00
Add strict mode for GPT and UV 0.3
diff --git a/llm_benchmark.py b/llm_benchmark.py
@@ -39,6 +39,9 @@
     action="append",
     help="JSON file defining tools that can be used",
 )
+parser.add_argument(
+    "--strict", action="store_true", help="Use strict mode when using tools"
+)
 parser.add_argument(
     "--model",
     "-m",
diff --git a/llm_benchmark_suite.py b/llm_benchmark_suite.py
@@ -431,6 +431,9 @@ def _tools_models():
         _Llm(GPT_4O),
         _Llm(GPT_4O_MINI),
         _Llm(GPT_4_TURBO),
+        _Llm(GPT_4O, GPT_4O + "-strict", strict=None),
+        _Llm(GPT_4O_MINI, GPT_4O_MINI + "-strict", strict=None),
+        _Llm(GPT_4_TURBO, GPT_4_TURBO + "-strict", strict=None),
         _Llm("claude-3-opus-20240229"),
         _Llm("claude-3-5-sonnet-20240620"),
         _Llm("claude-3-sonnet-20240229"),
@@ -476,7 +479,7 @@ def _audio_models():
         _Llm(GEMINI_1_5_PRO),
         _Llm(GEMINI_1_5_FLASH),
         _Llm(
-            "fixie-ai/ultravox-v0.2",
+            "fixie-ai/ultravox-v0.3",
             base_url="https://ultravox.api.fixie.ai/v1",
             api_key=os.getenv("ULTRAVOX_API_KEY"),
         ),
diff --git a/llm_request.py b/llm_request.py
@@ -83,6 +83,7 @@ class ApiContext:
     prompt: str
     files: List[InputFile]
     tools: List[Dict]
+    strict: bool
     temperature: float
     max_tokens: int
     detail: Optional[str] = None
@@ -99,6 +100,7 @@ def __init__(self, session, index, name, func, args, prompt, files, tools):
         self.prompt = prompt
         self.files = files
         self.tools = tools
+        self.strict = args.strict
         self.detail = args.detail
         self.temperature = args.temperature
         self.max_tokens = args.max_tokens
@@ -276,14 +278,19 @@ async def openai_chat(ctx: ApiContext, path: str = "/chat/completions") -> ApiRe
     url, headers = make_openai_url_and_headers(ctx, path)
     kwargs = {"messages": make_openai_messages(ctx)}
     if ctx.tools:
-        kwargs["tools"] = ctx.tools
+        tools = ctx.tools[:]
+        if ctx.strict:
+            for t in tools:
+                t["function"]["strict"] = True
+                t["function"]["parameters"]["additionalProperties"] = False
+        kwargs["tools"] = tools
         kwargs["tool_choice"] = "required"
     if ctx.peft:
         kwargs["peft"] = ctx.peft
     # Some providers require opt-in for stream stats, but some providers don't like this opt-in.
-    # Azure, ovh.net, and vLLM don't support stream stats at the moment.
+    # Regardless of opt-in, Azure and ovh.net don't return stream stats at the moment.
     # See https://github.com/Azure/azure-rest-api-specs/issues/25062
-    if not any(p in ctx.name for p in ["azure", "databricks", "fireworks", "ultravox"]):
+    if not any(p in ctx.name for p in ["azure", "databricks", "fireworks"]):
         kwargs["stream_options"] = {"include_usage": True}
     data = make_openai_chat_body(ctx, **kwargs)
     return await post(ctx, url, headers, data, openai_chunk_gen)
diff --git a/media/tools/flights.json b/media/tools/flights.json
@@ -12,7 +12,6 @@
         },
         "date": {
           "type": "string",
-          "format": "date",
           "description": "The date of the flight, e.g., 2024-06-17"
         }
       },

Original file line number	Diff line number	Diff line change
`@@ -12,7 +12,6 @@`
`12`	`12`	`},`
`13`	`13`	`"date": {`
`14`	`14`	`"type": "string",`
`15`		`- "format": "date",`
`16`	`15`	`"description": "The date of the flight, e.g., 2024-06-17"`
`17`	`16`	`}`
`18`	`17`	`},`