Skip to content

Commit 3d4c2f5

Browse files
authored
Merge pull request #129 from fixie-ai/juberti/baseten2
Update baseten to v0.4
2 parents 6bf7237 + 1ababbc commit 3d4c2f5

File tree

2 files changed

+6
-3
lines changed

2 files changed

+6
-3
lines changed

llm_benchmark_suite.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -492,9 +492,9 @@ def _audio_models():
492492
_UltravoxLlm("fixie-ai/ultravox-v0.4", "ultravox-v0.4-8b"),
493493
_UltravoxLlm("fixie-ai/ultravox-70B", "ultravox-v0.4-70b"),
494494
_Llm(
495-
"fixie-ai/ultravox-v0.2",
496-
"baseten.co/ultravox-v0.2",
497-
base_url="https://bridge.baseten.co/5wovovzq/v1/direct",
495+
"ultravox",
496+
"baseten.co/ultravox-v0.4",
497+
base_url="https://bridge.baseten.co/v1/direct",
498498
api_key=os.getenv("BASETEN_API_KEY"),
499499
),
500500
]

llm_request.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,9 @@ async def openai_chat(ctx: ApiContext, path: str = "/chat/completions") -> ApiRe
319319
# See https://github.com/Azure/azure-rest-api-specs/issues/25062
320320
if not any(p in ctx.name for p in ["azure", "databricks", "fireworks", "mistral"]):
321321
kwargs["stream_options"] = {"include_usage": True}
322+
# Hack to identify our baseten deployment, which isn't contained in the URL.
323+
if ctx.name.startswith("baseten"):
324+
kwargs["baseten"] = {"model_id": "rwn2v41w"}
322325
data = make_openai_chat_body(ctx, **kwargs)
323326
return await post(ctx, url, headers, data, openai_chunk_gen)
324327

0 commit comments

Comments
 (0)