From 23f458d2daf48be4947280f5e6643a1160f0e1e7 Mon Sep 17 00:00:00 2001
From: Krish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 1 Feb 2025 09:52:28 -0800
Subject: [PATCH] Improved O3 + Azure O3 support (#8181)

* fix: support azure o3 model family for fake streaming workaround (#8162)

* fix: support azure o3 model family for fake streaming workaround

* refactor: rename helper to is_o_series_model for clarity

* update function calling parameters for o3 models (#8178)

* refactor(o1_transformation.py): refactor o1 config to be o series config, expand o series model check to o3

ensures max_tokens is correctly translated for o3

* feat(openai/): refactor o1 files to be 'o_series' files

expands naming to cover o3

* fix(azure/chat/o1_handler.py): azure openai is an instance of openai - was causing resets

* test(test_azure_o_series.py): assert stream faked for azure o3 mini

Resolves https://github.com/BerriAI/litellm/pull/8162

* fix(o1_transformation.py): fix o1 transformation logic to handle explicit o1_series routing

* docs(azure.md): update doc with `o_series/` model name

---------

Co-authored-by: byrongrogan <47910641+byrongrogan@users.noreply.github.com>
Co-authored-by: Low Jian Sheng <15527690+lowjiansheng@users.noreply.github.com>
---
 docs/my-website/docs/providers/azure.md       | 61 +++++++++++++++++-
 litellm/__init__.py                           |  7 ++-
 .../get_supported_openai_params.py            |  2 +-
 litellm/llms/azure/chat/o1_handler.py         |  8 ++-
 litellm/llms/azure/chat/o1_transformation.py  | 37 ++++++++---
 .../{o1_handler.py => o_series_handler.py}    |  0
 ...ormation.py => o_series_transformation.py} |  8 ++-
 litellm/llms/openai/openai.py                 | 11 ++--
 litellm/main.py                               |  3 +-
 ...odel_prices_and_context_window_backup.json | 15 +++--
 litellm/utils.py                              |  8 +--
 model_prices_and_context_window.json          | 15 +++--
 ...est_azure_o1.py => test_azure_o_series.py} | 62 +++++++++++++++++++
 tests/llm_translation/test_openai_o1.py       | 11 ++++
 14 files changed, 211 insertions(+), 37 deletions(-)
 rename litellm/llms/openai/chat/{o1_handler.py => o_series_handler.py} (100%)
 rename litellm/llms/openai/chat/{o1_transformation.py => o_series_transformation.py} (95%)
 rename tests/llm_translation/{test_azure_o1.py => test_azure_o_series.py} (51%)

diff --git a/docs/my-website/docs/providers/azure.md b/docs/my-website/docs/providers/azure.md
index 05ea02302d83..111738a44959 100644
--- a/docs/my-website/docs/providers/azure.md
+++ b/docs/my-website/docs/providers/azure.md
@@ -10,7 +10,7 @@ import TabItem from '@theme/TabItem';
 | Property | Details |
 |-------|-------|
 | Description | Azure OpenAI Service provides REST API access to OpenAI's powerful language models including o1, o1-mini, GPT-4o, GPT-4o mini, GPT-4 Turbo with Vision, GPT-4, GPT-3.5-Turbo, and Embeddings model series |
-| Provider Route on LiteLLM | `azure/` |
+| Provider Route on LiteLLM | `azure/`, [`azure/o_series/`](#azure-o-series-models) |
 | Supported Operations | [`/chat/completions`](#azure-openai-chat-completion-models), [`/completions`](#azure-instruct-models), [`/embeddings`](../embedding/supported_embedding#azure-openai-embedding-models), [`/audio/speech`](#azure-text-to-speech-tts), [`/audio/transcriptions`](../audio_transcription), `/fine_tuning`, [`/batches`](#azure-batches-api), `/files`, [`/images`](../image_generation#azure-openai-image-generation-models) |
 | Link to Provider Doc | [Azure OpenAI ↗](https://learn.microsoft.com/en-us/azure/ai-services/openai/overview)
 
@@ -948,6 +948,65 @@ Expected Response:
 {"data":[{"id":"batch_R3V...}
 ```
 
+## O-Series Models
+
+Azure OpenAI O-Series models are supported on LiteLLM. 
+
+LiteLLM routes any deployment name with `o1` or `o3` in the model name, to the O-Series [transformation](https://github.com/BerriAI/litellm/blob/91ed05df2962b8eee8492374b048d27cc144d08c/litellm/llms/azure/chat/o1_transformation.py#L4) logic.
+
+To set this explicitly, set `model` to `azure/o_series/<your-deployment-name>`.
+
+**Automatic Routing**
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
+```python
+import litellm
+
+litellm.completion(model="azure/my-o3-deployment", messages=[{"role": "user", "content": "Hello, world!"}]) # 👈 Note: 'o3' in the deployment name
+```
+</TabItem>
+<TabItem value="proxy" label="PROXY">
+
+```yaml
+model_list:
+  - model_name: o3-mini
+    litellm_params:
+      model: azure/o3-model
+      api_base: os.environ/AZURE_API_BASE
+      api_key: os.environ/AZURE_API_KEY
+```
+
+</TabItem>
+</Tabs>
+
+**Explicit Routing**
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
+```python
+import litellm
+
+litellm.completion(model="azure/o_series/my-random-deployment-name", messages=[{"role": "user", "content": "Hello, world!"}]) # 👈 Note: 'o_series/' in the deployment name
+```
+</TabItem>
+<TabItem value="proxy" label="PROXY">
+
+```yaml
+model_list:
+  - model_name: o3-mini
+    litellm_params:
+      model: azure/o_series/my-random-deployment-name
+      api_base: os.environ/AZURE_API_BASE
+      api_key: os.environ/AZURE_API_KEY
+```
+</TabItem>
+</Tabs>
+
+
+
 ## Advanced
 ### Azure API Load-Balancing
 
diff --git a/litellm/__init__.py b/litellm/__init__.py
index 97c497fede8a..506ecb258e01 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -886,11 +886,12 @@ def add_known_models():
 from .llms.voyage.embedding.transformation import VoyageEmbeddingConfig
 from .llms.azure_ai.chat.transformation import AzureAIStudioConfig
 from .llms.mistral.mistral_chat_transformation import MistralConfig
-from .llms.openai.chat.o1_transformation import (
-    OpenAIO1Config,
+from .llms.openai.chat.o_series_transformation import (
+    OpenAIOSeriesConfig as OpenAIO1Config,  # maintain backwards compatibility
+    OpenAIOSeriesConfig,
 )
 
-openAIO1Config = OpenAIO1Config()
+openaiOSeriesConfig = OpenAIOSeriesConfig()
 from .llms.openai.chat.gpt_transformation import (
     OpenAIGPTConfig,
 )
diff --git a/litellm/litellm_core_utils/get_supported_openai_params.py b/litellm/litellm_core_utils/get_supported_openai_params.py
index e251784f4e18..9358518930c9 100644
--- a/litellm/litellm_core_utils/get_supported_openai_params.py
+++ b/litellm/litellm_core_utils/get_supported_openai_params.py
@@ -81,7 +81,7 @@ def get_supported_openai_params(  # noqa: PLR0915
     elif custom_llm_provider == "openai":
         return litellm.OpenAIConfig().get_supported_openai_params(model=model)
     elif custom_llm_provider == "azure":
-        if litellm.AzureOpenAIO1Config().is_o1_model(model=model):
+        if litellm.AzureOpenAIO1Config().is_o_series_model(model=model):
             return litellm.AzureOpenAIO1Config().get_supported_openai_params(
                 model=model
             )
diff --git a/litellm/llms/azure/chat/o1_handler.py b/litellm/llms/azure/chat/o1_handler.py
index 1cb6f888c3f7..a2042b3e2adb 100644
--- a/litellm/llms/azure/chat/o1_handler.py
+++ b/litellm/llms/azure/chat/o1_handler.py
@@ -1,7 +1,7 @@
 """
-Handler file for calls to Azure OpenAI's o1 family of models
+Handler file for calls to Azure OpenAI's o1/o3 family of models
 
-Written separately to handle faking streaming for o1 models.
+Written separately to handle faking streaming for o1 and o3 models.
 """
 
 from typing import Optional, Union
@@ -36,7 +36,9 @@ def _get_openai_client(
     ]:
 
         # Override to use Azure-specific client initialization
-        if isinstance(client, OpenAI) or isinstance(client, AsyncOpenAI):
+        if not isinstance(client, AzureOpenAI) and not isinstance(
+            client, AsyncAzureOpenAI
+        ):
             client = None
 
         return get_azure_openai_client(
diff --git a/litellm/llms/azure/chat/o1_transformation.py b/litellm/llms/azure/chat/o1_transformation.py
index 0b56aa1fb4d8..2cae4c7cbb1f 100644
--- a/litellm/llms/azure/chat/o1_transformation.py
+++ b/litellm/llms/azure/chat/o1_transformation.py
@@ -1,5 +1,5 @@
 """
-Support for o1 model family 
+Support for o1 and o3 model families
 
 https://platform.openai.com/docs/guides/reasoning
 
@@ -12,15 +12,16 @@
 - Temperature => drop param (if user opts in to dropping param)
 """
 
-from typing import Optional
+from typing import List, Optional
 
 from litellm import verbose_logger
+from litellm.types.llms.openai import AllMessageValues
 from litellm.utils import get_model_info
 
-from ...openai.chat.o1_transformation import OpenAIO1Config
+from ...openai.chat.o_series_transformation import OpenAIOSeriesConfig
 
 
-class AzureOpenAIO1Config(OpenAIO1Config):
+class AzureOpenAIO1Config(OpenAIOSeriesConfig):
     def should_fake_stream(
         self,
         model: Optional[str],
@@ -28,8 +29,9 @@ def should_fake_stream(
         custom_llm_provider: Optional[str] = None,
     ) -> bool:
         """
-        Currently no Azure OpenAI models support native streaming.
+        Currently no Azure O Series models support native streaming.
         """
+
         if stream is not True:
             return False
 
@@ -38,14 +40,31 @@ def should_fake_stream(
                 model_info = get_model_info(
                     model=model, custom_llm_provider=custom_llm_provider
                 )
-                if model_info.get("supports_native_streaming") is True:
+
+                if (
+                    model_info.get("supports_native_streaming") is True
+                ):  # allow user to override default with model_info={"supports_native_streaming": true}
                     return False
             except Exception as e:
                 verbose_logger.debug(
                     f"Error getting model info in AzureOpenAIO1Config: {e}"
                 )
-
         return True
 
-    def is_o1_model(self, model: str) -> bool:
-        return "o1" in model
+    def is_o_series_model(self, model: str) -> bool:
+        return "o1" in model or "o3" in model or "o_series/" in model
+
+    def transform_request(
+        self,
+        model: str,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        headers: dict,
+    ) -> dict:
+        model = model.replace(
+            "o_series/", ""
+        )  # handle o_series/my-random-deployment-name
+        return super().transform_request(
+            model, messages, optional_params, litellm_params, headers
+        )
diff --git a/litellm/llms/openai/chat/o1_handler.py b/litellm/llms/openai/chat/o_series_handler.py
similarity index 100%
rename from litellm/llms/openai/chat/o1_handler.py
rename to litellm/llms/openai/chat/o_series_handler.py
diff --git a/litellm/llms/openai/chat/o1_transformation.py b/litellm/llms/openai/chat/o_series_transformation.py
similarity index 95%
rename from litellm/llms/openai/chat/o1_transformation.py
rename to litellm/llms/openai/chat/o_series_transformation.py
index f19472982bb2..b90a9ff578a2 100644
--- a/litellm/llms/openai/chat/o1_transformation.py
+++ b/litellm/llms/openai/chat/o_series_transformation.py
@@ -26,7 +26,7 @@
 from .gpt_transformation import OpenAIGPTConfig
 
 
-class OpenAIO1Config(OpenAIGPTConfig):
+class OpenAIOSeriesConfig(OpenAIGPTConfig):
     """
     Reference: https://platform.openai.com/docs/guides/reasoning
     """
@@ -128,8 +128,10 @@ def map_openai_params(
             non_default_params, optional_params, model, drop_params
         )
 
-    def is_model_o1_reasoning_model(self, model: str) -> bool:
-        if model in litellm.open_ai_chat_completion_models and "o1" in model:
+    def is_model_o_series_model(self, model: str) -> bool:
+        if model in litellm.open_ai_chat_completion_models and (
+            "o1" in model or "o3" in model
+        ):
             return True
         return False
 
diff --git a/litellm/llms/openai/openai.py b/litellm/llms/openai/openai.py
index eb095661a833..82b9c9ba3845 100644
--- a/litellm/llms/openai/openai.py
+++ b/litellm/llms/openai/openai.py
@@ -47,8 +47,11 @@
 
 from ...types.llms.openai import *
 from ..base import BaseLLM
+from .chat.o_series_transformation import OpenAIOSeriesConfig
 from .common_utils import OpenAIError, drop_params_from_unprocessable_entity_error
 
+openaiOSeriesConfig = OpenAIOSeriesConfig()
+
 
 class MistralEmbeddingConfig:
     """
@@ -174,8 +177,8 @@ def get_supported_openai_params(self, model: str) -> list:
         Returns:
             list: List of supported openai parameters
         """
-        if litellm.openAIO1Config.is_model_o1_reasoning_model(model=model):
-            return litellm.openAIO1Config.get_supported_openai_params(model=model)
+        if openaiOSeriesConfig.is_model_o_series_model(model=model):
+            return openaiOSeriesConfig.get_supported_openai_params(model=model)
         elif litellm.openAIGPTAudioConfig.is_model_gpt_audio_model(model=model):
             return litellm.openAIGPTAudioConfig.get_supported_openai_params(model=model)
         else:
@@ -203,8 +206,8 @@ def map_openai_params(
         drop_params: bool,
     ) -> dict:
         """ """
-        if litellm.openAIO1Config.is_model_o1_reasoning_model(model=model):
-            return litellm.openAIO1Config.map_openai_params(
+        if openaiOSeriesConfig.is_model_o_series_model(model=model):
+            return openaiOSeriesConfig.map_openai_params(
                 non_default_params=non_default_params,
                 optional_params=optional_params,
                 model=model,
diff --git a/litellm/main.py b/litellm/main.py
index 503aeccee24b..35ff3f7b9504 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -1201,7 +1201,8 @@ def completion(  # type: ignore # noqa: PLR0915
             if extra_headers is not None:
                 optional_params["extra_headers"] = extra_headers
 
-            if litellm.AzureOpenAIO1Config().is_o1_model(model=model):
+            if litellm.AzureOpenAIO1Config().is_o_series_model(model=model):
+
                 ## LOAD CONFIG - if set
                 config = litellm.AzureOpenAIO1Config.get_config()
                 for k, v in config.items():
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 477edc331183..eafb4cb3c5a0 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -211,8 +211,11 @@
         "cache_read_input_token_cost": 0.00000055,
         "litellm_provider": "openai",
         "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": false,
         "supports_vision": false,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_response_schema": true
     },
     "o3-mini-2025-01-31": {
         "max_tokens": 100000,
@@ -223,8 +226,11 @@
         "cache_read_input_token_cost": 0.00000055,
         "litellm_provider": "openai",
         "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": false,
         "supports_vision": false,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_response_schema": true
     },
     "o1-mini-2024-09-12": {
         "max_tokens": 65536,
@@ -978,8 +984,9 @@
         "cache_read_input_token_cost": 0.00000055,
         "litellm_provider": "azure",
         "mode": "chat",
-        "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_vision": false,
+        "supports_prompt_caching": true,
+        "supports_response_schema": true
     },
     "azure/o1-mini": {
         "max_tokens": 65536,
diff --git a/litellm/utils.py b/litellm/utils.py
index 9c4beaea9028..7197862e3a9a 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -3485,7 +3485,7 @@ def _check_valid_arg(supported_params: List[str]):
             ),
         )
     elif custom_llm_provider == "azure":
-        if litellm.AzureOpenAIO1Config().is_o1_model(model=model):
+        if litellm.AzureOpenAIO1Config().is_o_series_model(model=model):
             optional_params = litellm.AzureOpenAIO1Config().map_openai_params(
                 non_default_params=non_default_params,
                 optional_params=optional_params,
@@ -5918,9 +5918,9 @@ def get_provider_chat_config(  # noqa: PLR0915
         """
         if (
             provider == LlmProviders.OPENAI
-            and litellm.openAIO1Config.is_model_o1_reasoning_model(model=model)
+            and litellm.openaiOSeriesConfig.is_model_o_series_model(model=model)
         ):
-            return litellm.OpenAIO1Config()
+            return litellm.openaiOSeriesConfig
         elif litellm.LlmProviders.DEEPSEEK == provider:
             return litellm.DeepSeekChatConfig()
         elif litellm.LlmProviders.GROQ == provider:
@@ -5993,7 +5993,7 @@ def get_provider_chat_config(  # noqa: PLR0915
         ):
             return litellm.AI21ChatConfig()
         elif litellm.LlmProviders.AZURE == provider:
-            if litellm.AzureOpenAIO1Config().is_o1_model(model=model):
+            if litellm.AzureOpenAIO1Config().is_o_series_model(model=model):
                 return litellm.AzureOpenAIO1Config()
             return litellm.AzureOpenAIConfig()
         elif litellm.LlmProviders.AZURE_AI == provider:
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index 477edc331183..eafb4cb3c5a0 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -211,8 +211,11 @@
         "cache_read_input_token_cost": 0.00000055,
         "litellm_provider": "openai",
         "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": false,
         "supports_vision": false,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_response_schema": true
     },
     "o3-mini-2025-01-31": {
         "max_tokens": 100000,
@@ -223,8 +226,11 @@
         "cache_read_input_token_cost": 0.00000055,
         "litellm_provider": "openai",
         "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": false,
         "supports_vision": false,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_response_schema": true
     },
     "o1-mini-2024-09-12": {
         "max_tokens": 65536,
@@ -978,8 +984,9 @@
         "cache_read_input_token_cost": 0.00000055,
         "litellm_provider": "azure",
         "mode": "chat",
-        "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_vision": false,
+        "supports_prompt_caching": true,
+        "supports_response_schema": true
     },
     "azure/o1-mini": {
         "max_tokens": 65536,
diff --git a/tests/llm_translation/test_azure_o1.py b/tests/llm_translation/test_azure_o_series.py
similarity index 51%
rename from tests/llm_translation/test_azure_o1.py
rename to tests/llm_translation/test_azure_o_series.py
index d16b11696d48..b1187f42aef6 100644
--- a/tests/llm_translation/test_azure_o1.py
+++ b/tests/llm_translation/test_azure_o_series.py
@@ -63,3 +63,65 @@ def test_override_fake_stream(self):
             model="azure/o1-preview", stream=True
         )
         assert fake_stream is False
+
+
+def test_azure_o3_streaming():
+    """
+    Test that o3 models handles fake streaming correctly.
+    """
+    from openai import AzureOpenAI
+    from litellm import completion
+
+    client = AzureOpenAI(
+        api_key="my-fake-o1-key",
+        base_url="https://openai-gpt-4-test-v-1.openai.azure.com",
+        api_version="2024-02-15-preview",
+    )
+
+    with patch.object(
+        client.chat.completions.with_raw_response, "create"
+    ) as mock_create:
+        try:
+            completion(
+                model="azure/o3-mini",
+                messages=[{"role": "user", "content": "Hello, world!"}],
+                stream=True,
+                client=client,
+            )
+        except (
+            Exception
+        ) as e:  # expect output translation error as mock response doesn't return a json
+            print(e)
+        assert mock_create.call_count == 1
+        assert "stream" not in mock_create.call_args.kwargs
+
+
+def test_azure_o_series_routing():
+    """
+    Allows user to pass model="azure/o_series/<any-deployment-name>" for explicit o_series model routing.
+    """
+    from openai import AzureOpenAI
+    from litellm import completion
+
+    client = AzureOpenAI(
+        api_key="my-fake-o1-key",
+        base_url="https://openai-gpt-4-test-v-1.openai.azure.com",
+        api_version="2024-02-15-preview",
+    )
+
+    with patch.object(
+        client.chat.completions.with_raw_response, "create"
+    ) as mock_create:
+        try:
+            completion(
+                model="azure/o_series/my-random-deployment-name",
+                messages=[{"role": "user", "content": "Hello, world!"}],
+                stream=True,
+                client=client,
+            )
+        except (
+            Exception
+        ) as e:  # expect output translation error as mock response doesn't return a json
+            print(e)
+        assert mock_create.call_count == 1
+        assert "stream" not in mock_create.call_args.kwargs
diff --git a/tests/llm_translation/test_openai_o1.py b/tests/llm_translation/test_openai_o1.py
index 9523038eaa70..d7423f7c5022 100644
--- a/tests/llm_translation/test_openai_o1.py
+++ b/tests/llm_translation/test_openai_o1.py
@@ -167,6 +167,17 @@ def test_prompt_caching(self):
         pass
 
 
+class TestOpenAIO3(BaseLLMChatTest):
+    def get_base_completion_call_args(self):
+        return {
+            "model": "o3-mini",
+        }
+
+    def test_tool_call_no_arguments(self, tool_call_no_arguments):
+        """Test that tool calls with no arguments is translated correctly. Relevant issue: https://github.com/BerriAI/litellm/issues/6833"""
+        pass
+
+
 def test_o1_supports_vision():
     """Test that o1 supports vision"""
     os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"