fixie-ai · anurlybayev · Dec 4, 2024 · Dec 4, 2024 · Jan 5, 2025
diff --git a/README.md b/README.md
@@ -10,6 +10,7 @@ Large Language Models (LLMs):
 - Llama2 and 3 from several different providers, including
   - Anyscale
   - Azure
+  - CentML
   - Cerebras
   - Cloudflare
   - Groq

diff --git a/llm_benchmark_suite.py b/llm_benchmark_suite.py
@@ -27,6 +27,7 @@
 GPT_35_TURBO_1106 = "gpt-3.5-turbo-1106"
 GEMINI_1_5_PRO = "gemini-1.5-pro"
 GEMINI_1_5_FLASH = "gemini-1.5-flash"
+LLAMA_33_70B_INSTRUCT = "Llama-3.3-70B-Instruct"
 LLAMA_31_405B_CHAT = "llama-3.1-405b-chat"
 LLAMA_31_405B_CHAT_FP8 = "llama-3.1-405b-chat-fp8"
 LLAMA_31_70B_CHAT = "llama-3.1-70b-chat"
@@ -125,6 +126,18 @@ async def run(self, pass_argv: List[str], spread: float) -> asyncio.Task:
         return await llm_benchmark.run(full_argv)
 
 
+class _CentmlLlm(_Llm):
+    """See https://docs.centml.ai/resources/pricing"""
+
+    def __init__(self, model: str, display_model: Optional[str] = None):
+        super().__init__(
+            model,
+            "centml.ai/" + (display_model or model),
+            api_key=os.getenv("CENTML_API_KEY"),
+            base_url="https://api.centml.com/openai/v1",
+        )
+
+
 class _CerebrasLlm(_Llm):
     """See https://docs.cerebras.ai/en/latest/wsc/Model-zoo/MZ-overview.html#list-of-models"""
 
@@ -368,6 +381,8 @@ def _text_models():
             "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", LLAMA_31_405B_CHAT_FP8
         ),
         # _OvhLlm("llama-3-1-405b-instruct", LLAMA_31_405B_CHAT),
+        # Llama 3.3 70b
+        _CentmlLlm("meta-llama/Llama-3.3-70B-Instruct", LLAMA_33_70B_INSTRUCT),
         # Llama 3.1 70b
         _CerebrasLlm("llama3.1-70b", LLAMA_31_70B_CHAT),
         _CloudflareLlm("@cf/meta/llama-3.1-70b-preview", LLAMA_31_70B_CHAT),
@@ -459,6 +474,7 @@ def _tools_models():
         # _FireworksLlm(
         #    "accounts/fireworks/models/llama-v3p1-405b-instruct", LLAMA_31_405B_CHAT_FP8
         # ), returns "FUNCTION" and the call as text
+        _CentmlLlm("meta-llama/Llama-3.3-70B-Instruct", LLAMA_33_70B_INSTRUCT),
         _GroqLlm("llama-3.1-405b-reasoning", LLAMA_31_405B_CHAT_FP8),
         _GroqLlm("llama-3.1-70b-versatile", LLAMA_31_70B_CHAT_FP8),
         _GroqLlm("llama-3.1-8b-instant", LLAMA_31_8B_CHAT_FP8),