Merge pull request #130 from fabienric/fix-ovh

juberti · web-flow · commit 7ac8bde8f20b · 2024-10-29T13:57:20.000-07:00
Fixes for the OVHcloud provider
diff --git a/README.md b/README.md
@@ -14,13 +14,15 @@ Large Language Models (LLMs):
   - Cloudflare
   - Groq
   - OctoAI
+  - OVHcloud
   - Perplexity
   - Together
 - Mixtral 8x7B from several different providers, including
   - Anyscale
   - Azure
   - Groq
   - OctoAI
+  - OVHcloud
   - Perplexity
 
 Embedding Models:
diff --git a/llm_benchmark_suite.py b/llm_benchmark_suite.py
@@ -225,7 +225,8 @@ class _OvhLlm(_Llm):
     def __init__(self, model: str, display_model: Optional[str] = None):
         super().__init__(
             "",
-            "cloud.ovh.net/" + display_model,
+            f"endpoints.ai.cloud.ovh.net/{model}",
+            api_key=os.getenv("OVH_AI_ENDPOINTS_API_KEY"),
             base_url=f"https://{model}.endpoints.kepler.ai.cloud.ovh.net/api/openai_compat/v1",
         )
 
@@ -352,6 +353,7 @@ def _text_models():
         _GroqLlm("mixtral-8x7b-32768", MIXTRAL_8X7B_INSTRUCT_FP8),
         _NvidiaLlm("mistralai/mixtral-8x7b-instruct-v0.1-turbo", MIXTRAL_8X7B_INSTRUCT_FP8),
         _TogetherLlm("mistralai/Mixtral-8x7B-Instruct-v0.1", MIXTRAL_8X7B_INSTRUCT),
+        _OvhLlm("mixtral-8x7b-instruct-v01", MIXTRAL_8X7B_INSTRUCT),
         # Llama 3.1 405b
         _DatabricksLlm("databricks-meta-llama-3.1-405b-instruct", LLAMA_31_405B_CHAT),
         _DeepInfraLlm(
@@ -365,7 +367,7 @@ def _text_models():
         _TogetherLlm(
             "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", LLAMA_31_405B_CHAT_FP8
         ),
-        # _OvhLlm("llama-3p1-405b-instruct", LLAMA_31_405B_CHAT),
+        # _OvhLlm("llama-3-1-405b-instruct", LLAMA_31_405B_CHAT),
         # Llama 3.1 70b
         _CerebrasLlm("llama3.1-70b", LLAMA_31_70B_CHAT),
         _CloudflareLlm("@cf/meta/llama-3.1-70b-preview", LLAMA_31_70B_CHAT),
@@ -380,7 +382,7 @@ def _text_models():
         _TogetherLlm(
             "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", LLAMA_31_70B_CHAT_FP8
         ),
-        # _OvhLlm("llama-3p1-8b-instruct", LLAMA_31_8B_CHAT),
+        _OvhLlm("llama-3-1-70b-instruct", LLAMA_31_70B_CHAT),
         # Llama 3.1 8b
         _CerebrasLlm("llama3.1-8b", LLAMA_31_8B_CHAT),
         _CloudflareLlm("@cf/meta/llama-3.1-8b-preview", LLAMA_31_8B_CHAT),
@@ -395,7 +397,7 @@ def _text_models():
         _TogetherLlm(
             "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", LLAMA_31_8B_CHAT_FP8
         ),
-        # _OvhLlm("llama-3p1-70b-instruct", LLAMA_31_70B_CHAT),
+        # _OvhLlm("llama-3-1-8b-instruct", LLAMA_31_8B_CHAT),
         # Llama 3 70b
         _DatabricksLlm("databricks-meta-llama-3-70b-instruct", LLAMA_3_70B_CHAT),
         _DeepInfraLlm("meta-llama/Meta-Llama-3-70B-Instruct", LLAMA_3_70B_CHAT),
diff --git a/llm_request.py b/llm_request.py
@@ -209,10 +209,18 @@ def make_openai_url_and_headers(ctx: ApiContext, path: str):
     url = ctx.base_url or "https://api.openai.com/v1"
     hostname = urllib.parse.urlparse(url).hostname
     use_azure_openai = hostname and hostname.endswith("openai.azure.com")
+    use_ovh = hostname and hostname.endswith("cloud.ovh.net")
     if use_azure_openai:
         api_key = get_api_key(ctx, "AZURE_OPENAI_API_KEY")
         headers = make_headers(api_key=api_key)
         url += f"/openai/deployments/{ctx.model.replace('.', '')}{path}?api-version={AZURE_OPENAI_API_VERSION}"
+    elif use_ovh:
+        api_key = get_api_key(ctx, "OVH_AI_ENDPOINTS_API_KEY")
+        headers = {
+            "content-type": "application/json",
+            "authorization": api_key
+        }
+        url += path
     else:
         api_key = ctx.api_key if ctx.base_url else get_api_key(ctx, "OPENAI_API_KEY")
         headers = make_headers(auth_token=api_key)