Add Cerebras (#119)

mdepinet · web-flow · commit cb8aa46f85d3 · 2024-08-27T14:53:24.000-07:00
diff --git a/llm_benchmark.py b/llm_benchmark.py
@@ -240,7 +240,7 @@ def on_token(ctx: llm_request.ApiContext, token: str):
     task0_metrics = contexts[0].metrics
     if not chosen:
         if args.format == FMT_DEFAULT:
-            print(f"No successful API calls for {init_ctx.name}")
+            print(f"No successful API calls for {init_ctx.name}. Sample error: {task0_metrics.error}")
         return task0_metrics
 
     # Print results.
diff --git a/llm_benchmark_suite.py b/llm_benchmark_suite.py
@@ -248,6 +248,18 @@ def __init__(self, model: str, display_model: Optional[str] = None):
         )
 
 
+class _CerebrasLlm(_Llm):
+    """See https://docs.cerebras.ai/en/latest/wsc/Model-zoo/MZ-overview.html#list-of-models"""
+
+    def __init__(self, model: str, display_model: Optional[str] = None):
+        super().__init__(
+            model,
+            "api.cerebras.ai/" + (display_model or model),
+            api_key=os.getenv("CEREBRAS_API_KEY"),
+            base_url="https://api.cerebras.ai/v1",
+        )
+
+
 def _text_models():
     AZURE_EASTUS2_OPENAI_API_KEY = os.getenv("AZURE_EASTUS2_OPENAI_API_KEY")
     return [
@@ -358,6 +370,7 @@ def _text_models():
         _TogetherLlm(
             "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", LLAMA_31_70B_CHAT_FP8
         ),
+        _CerebrasLlm("llama3.1-70b", LLAMA_31_70B_CHAT),
         # _OvhLlm("llama-3p1-8b-instruct", LLAMA_31_8B_CHAT),
         # Llama 3.1 8b
         # _DatabricksLlm("databricks-meta-llama-3.1-8b-instruct", LLAMA_31_8B_CHAT),
@@ -371,6 +384,7 @@ def _text_models():
         _TogetherLlm(
             "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", LLAMA_31_8B_CHAT_FP8
         ),
+        _CerebrasLlm("llama3.1-8b", LLAMA_31_8B_CHAT),
         # _OvhLlm("llama-3p1-70b-instruct", LLAMA_31_70B_CHAT),
         # Llama 3 70b
         _DatabricksLlm("databricks-meta-llama-3-70b-instruct", LLAMA_3_70B_CHAT),