@@ -219,24 +219,6 @@ def __init__(self, model: str, display_model: Optional[str] = None):
219
219
)
220
220
221
221
222
- class _OctoLlm (_Llm ):
223
- """See https://octo.ai/docs/getting-started/inference-models#serverless-endpoints"""
224
-
225
- def __init__ (
226
- self ,
227
- model : str ,
228
- display_model : Optional [str ] = None ,
229
- peft : Optional [str ] = None ,
230
- ):
231
- super ().__init__ (
232
- model ,
233
- "octo.ai/" + (display_model or model ),
234
- api_key = os .getenv ("OCTOML_API_KEY" ),
235
- base_url = "https://text.octoai.run/v1" ,
236
- peft = peft ,
237
- )
238
-
239
-
240
222
class _OvhLlm (_Llm ):
241
223
"""See https://llama-3-70b-instruct.endpoints.kepler.ai.cloud.ovh.net/doc"""
242
224
@@ -369,7 +351,6 @@ def _text_models():
369
351
),
370
352
_GroqLlm ("mixtral-8x7b-32768" , MIXTRAL_8X7B_INSTRUCT_FP8 ),
371
353
_NvidiaLlm ("mistralai/mixtral-8x7b-instruct-v0.1" , MIXTRAL_8X7B_INSTRUCT ),
372
- _OctoLlm ("mixtral-8x7b-instruct" , MIXTRAL_8X7B_INSTRUCT ),
373
354
_TogetherLlm ("mistralai/Mixtral-8x7B-Instruct-v0.1" , MIXTRAL_8X7B_INSTRUCT ),
374
355
# Llama 3.1 405b
375
356
_DatabricksLlm ("databricks-meta-llama-3.1-405b-instruct" , LLAMA_31_405B_CHAT ),
@@ -381,7 +362,6 @@ def _text_models():
381
362
),
382
363
_GroqLlm ("llama-3.1-405b-reasoning" , LLAMA_31_405B_CHAT_FP8 ),
383
364
_NvidiaLlm ("meta/llama-3.1-405b-instruct" , LLAMA_31_405B_CHAT ),
384
- _OctoLlm ("meta-llama-3.1-405b-instruct" , LLAMA_31_405B_CHAT ),
385
365
_TogetherLlm (
386
366
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo" , LLAMA_31_405B_CHAT_FP8
387
367
),
@@ -396,7 +376,6 @@ def _text_models():
396
376
),
397
377
_GroqLlm ("llama-3.1-70b-versatile" , LLAMA_31_70B_CHAT_FP8 ),
398
378
_NvidiaLlm ("meta/llama-3.1-70b-instruct" , LLAMA_31_70B_CHAT ),
399
- _OctoLlm ("meta-llama-3.1-70b-instruct" , LLAMA_31_70B_CHAT ),
400
379
_PerplexityLlm ("llama-3.1-70b-instruct" , LLAMA_31_70B_CHAT ),
401
380
_TogetherLlm (
402
381
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo" , LLAMA_31_70B_CHAT_FP8
@@ -412,7 +391,6 @@ def _text_models():
412
391
),
413
392
_GroqLlm ("llama-3.1-8b-instant" , LLAMA_31_8B_CHAT_FP8 ),
414
393
_NvidiaLlm ("meta/llama-3.1-8b-instruct" , LLAMA_31_8B_CHAT ),
415
- _OctoLlm ("meta-llama-3.1-8b-instruct" , LLAMA_31_8B_CHAT ),
416
394
_PerplexityLlm ("llama-3.1-8b-instruct" , LLAMA_31_8B_CHAT ),
417
395
_TogetherLlm (
418
396
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo" , LLAMA_31_8B_CHAT_FP8
@@ -429,7 +407,6 @@ def _text_models():
429
407
),
430
408
_GroqLlm ("llama3-70b-8192" , LLAMA_3_70B_CHAT_FP8 ),
431
409
_NvidiaLlm ("meta/llama3-70b-instruct" , LLAMA_3_70B_CHAT ),
432
- _OctoLlm ("meta-llama-3-70b-instruct" , LLAMA_3_70B_CHAT ),
433
410
_TogetherLlm ("meta-llama/Llama-3-70b-chat-hf" , LLAMA_3_70B_CHAT ),
434
411
_TogetherLlm (
435
412
"meta-llama/Meta-Llama-3-70B-Instruct-Turbo" , LLAMA_3_70B_CHAT_FP8
@@ -452,7 +429,6 @@ def _text_models():
452
429
),
453
430
_GroqLlm ("llama3-8b-8192" , LLAMA_3_8B_CHAT_FP8 ),
454
431
_NvidiaLlm ("meta/llama3-8b-instruct" , LLAMA_3_8B_CHAT ),
455
- _OctoLlm ("meta-llama-3-8b-instruct" , LLAMA_3_8B_CHAT ),
456
432
_TogetherLlm ("meta-llama/Llama-3-8b-chat-hf" , LLAMA_3_8B_CHAT ),
457
433
_TogetherLlm ("meta-llama/Meta-Llama-3-8B-Instruct-Turbo" , LLAMA_3_8B_CHAT_FP8 ),
458
434
_TogetherLlm ("meta-llama/Meta-Llama-3-8B-Instruct-Lite" , LLAMA_3_8B_CHAT_FP4 ),
@@ -462,11 +438,6 @@ def _text_models():
462
438
"accounts/fixie/models/8ab03ea85d2a4b9da659ce63db36a9b1" ,
463
439
LLAMA_3_8B_CHAT + "-lora-8ab0" ,
464
440
),
465
- _OctoLlm (
466
- "openpipe-llama-3-8b-32k" ,
467
- "openpipe-llama-3-8b-32k-lora-01j3" ,
468
- peft = "asset_01j318x0k2f7bv3nc5np6byn7s" ,
469
- ),
470
441
]
471
442
472
443
0 commit comments