Skip to content

Commit

Permalink
Improved O3 + Azure O3 support (#8181)
Browse files Browse the repository at this point in the history
* fix: support azure o3 model family for fake streaming workaround (#8162)

* fix: support azure o3 model family for fake streaming workaround

* refactor: rename helper to is_o_series_model for clarity

* update function calling parameters for o3 models (#8178)

* refactor(o1_transformation.py): refactor o1 config to be o series config, expand o series model check to o3

ensures max_tokens is correctly translated for o3

* feat(openai/): refactor o1 files to be 'o_series' files

expands naming to cover o3

* fix(azure/chat/o1_handler.py): azure openai is an instance of openai - was causing resets

* test(test_azure_o_series.py): assert stream faked for azure o3 mini

Resolves #8162

* fix(o1_transformation.py): fix o1 transformation logic to handle explicit o1_series routing

* docs(azure.md): update doc with `o_series/` model name

---------

Co-authored-by: byrongrogan <[email protected]>
Co-authored-by: Low Jian Sheng <[email protected]>
  • Loading branch information
3 people authored Feb 1, 2025
1 parent 91ed05d commit 23f458d
Show file tree
Hide file tree
Showing 14 changed files with 211 additions and 37 deletions.
61 changes: 60 additions & 1 deletion docs/my-website/docs/providers/azure.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import TabItem from '@theme/TabItem';
| Property | Details |
|-------|-------|
| Description | Azure OpenAI Service provides REST API access to OpenAI's powerful language models including o1, o1-mini, GPT-4o, GPT-4o mini, GPT-4 Turbo with Vision, GPT-4, GPT-3.5-Turbo, and Embeddings model series |
| Provider Route on LiteLLM | `azure/` |
| Provider Route on LiteLLM | `azure/`, [`azure/o_series/`](#azure-o-series-models) |
| Supported Operations | [`/chat/completions`](#azure-openai-chat-completion-models), [`/completions`](#azure-instruct-models), [`/embeddings`](../embedding/supported_embedding#azure-openai-embedding-models), [`/audio/speech`](#azure-text-to-speech-tts), [`/audio/transcriptions`](../audio_transcription), `/fine_tuning`, [`/batches`](#azure-batches-api), `/files`, [`/images`](../image_generation#azure-openai-image-generation-models) |
| Link to Provider Doc | [Azure OpenAI ↗](https://learn.microsoft.com/en-us/azure/ai-services/openai/overview)

Expand Down Expand Up @@ -948,6 +948,65 @@ Expected Response:
{"data":[{"id":"batch_R3V...}
```

## O-Series Models

Azure OpenAI O-Series models are supported on LiteLLM.

LiteLLM routes any deployment name with `o1` or `o3` in the model name, to the O-Series [transformation](https://github.com/BerriAI/litellm/blob/91ed05df2962b8eee8492374b048d27cc144d08c/litellm/llms/azure/chat/o1_transformation.py#L4) logic.

To set this explicitly, set `model` to `azure/o_series/<your-deployment-name>`.

**Automatic Routing**

<Tabs>
<TabItem value="sdk" label="SDK">

```python
import litellm
litellm.completion(model="azure/my-o3-deployment", messages=[{"role": "user", "content": "Hello, world!"}]) # 👈 Note: 'o3' in the deployment name
```
</TabItem>
<TabItem value="proxy" label="PROXY">

```yaml
model_list:
- model_name: o3-mini
litellm_params:
model: azure/o3-model
api_base: os.environ/AZURE_API_BASE
api_key: os.environ/AZURE_API_KEY
```

</TabItem>
</Tabs>

**Explicit Routing**

<Tabs>
<TabItem value="sdk" label="SDK">

```python
import litellm
litellm.completion(model="azure/o_series/my-random-deployment-name", messages=[{"role": "user", "content": "Hello, world!"}]) # 👈 Note: 'o_series/' in the deployment name
```
</TabItem>
<TabItem value="proxy" label="PROXY">

```yaml
model_list:
- model_name: o3-mini
litellm_params:
model: azure/o_series/my-random-deployment-name
api_base: os.environ/AZURE_API_BASE
api_key: os.environ/AZURE_API_KEY
```
</TabItem>
</Tabs>



## Advanced
### Azure API Load-Balancing

Expand Down
7 changes: 4 additions & 3 deletions litellm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -886,11 +886,12 @@ def add_known_models():
from .llms.voyage.embedding.transformation import VoyageEmbeddingConfig
from .llms.azure_ai.chat.transformation import AzureAIStudioConfig
from .llms.mistral.mistral_chat_transformation import MistralConfig
from .llms.openai.chat.o1_transformation import (
OpenAIO1Config,
from .llms.openai.chat.o_series_transformation import (
OpenAIOSeriesConfig as OpenAIO1Config, # maintain backwards compatibility
OpenAIOSeriesConfig,
)

openAIO1Config = OpenAIO1Config()
openaiOSeriesConfig = OpenAIOSeriesConfig()
from .llms.openai.chat.gpt_transformation import (
OpenAIGPTConfig,
)
Expand Down
2 changes: 1 addition & 1 deletion litellm/litellm_core_utils/get_supported_openai_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def get_supported_openai_params( # noqa: PLR0915
elif custom_llm_provider == "openai":
return litellm.OpenAIConfig().get_supported_openai_params(model=model)
elif custom_llm_provider == "azure":
if litellm.AzureOpenAIO1Config().is_o1_model(model=model):
if litellm.AzureOpenAIO1Config().is_o_series_model(model=model):
return litellm.AzureOpenAIO1Config().get_supported_openai_params(
model=model
)
Expand Down
8 changes: 5 additions & 3 deletions litellm/llms/azure/chat/o1_handler.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Handler file for calls to Azure OpenAI's o1 family of models
Handler file for calls to Azure OpenAI's o1/o3 family of models
Written separately to handle faking streaming for o1 models.
Written separately to handle faking streaming for o1 and o3 models.
"""

from typing import Optional, Union
Expand Down Expand Up @@ -36,7 +36,9 @@ def _get_openai_client(
]:

# Override to use Azure-specific client initialization
if isinstance(client, OpenAI) or isinstance(client, AsyncOpenAI):
if not isinstance(client, AzureOpenAI) and not isinstance(
client, AsyncAzureOpenAI
):
client = None

return get_azure_openai_client(
Expand Down
37 changes: 28 additions & 9 deletions litellm/llms/azure/chat/o1_transformation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Support for o1 model family
Support for o1 and o3 model families
https://platform.openai.com/docs/guides/reasoning
Expand All @@ -12,24 +12,26 @@
- Temperature => drop param (if user opts in to dropping param)
"""

from typing import Optional
from typing import List, Optional

from litellm import verbose_logger
from litellm.types.llms.openai import AllMessageValues
from litellm.utils import get_model_info

from ...openai.chat.o1_transformation import OpenAIO1Config
from ...openai.chat.o_series_transformation import OpenAIOSeriesConfig


class AzureOpenAIO1Config(OpenAIO1Config):
class AzureOpenAIO1Config(OpenAIOSeriesConfig):
def should_fake_stream(
self,
model: Optional[str],
stream: Optional[bool],
custom_llm_provider: Optional[str] = None,
) -> bool:
"""
Currently no Azure OpenAI models support native streaming.
Currently no Azure O Series models support native streaming.
"""

if stream is not True:
return False

Expand All @@ -38,14 +40,31 @@ def should_fake_stream(
model_info = get_model_info(
model=model, custom_llm_provider=custom_llm_provider
)
if model_info.get("supports_native_streaming") is True:

if (
model_info.get("supports_native_streaming") is True
): # allow user to override default with model_info={"supports_native_streaming": true}
return False
except Exception as e:
verbose_logger.debug(
f"Error getting model info in AzureOpenAIO1Config: {e}"
)

return True

def is_o1_model(self, model: str) -> bool:
return "o1" in model
def is_o_series_model(self, model: str) -> bool:
return "o1" in model or "o3" in model or "o_series/" in model

def transform_request(
self,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
headers: dict,
) -> dict:
model = model.replace(
"o_series/", ""
) # handle o_series/my-random-deployment-name
return super().transform_request(
model, messages, optional_params, litellm_params, headers
)
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from .gpt_transformation import OpenAIGPTConfig


class OpenAIO1Config(OpenAIGPTConfig):
class OpenAIOSeriesConfig(OpenAIGPTConfig):
"""
Reference: https://platform.openai.com/docs/guides/reasoning
"""
Expand Down Expand Up @@ -128,8 +128,10 @@ def map_openai_params(
non_default_params, optional_params, model, drop_params
)

def is_model_o1_reasoning_model(self, model: str) -> bool:
if model in litellm.open_ai_chat_completion_models and "o1" in model:
def is_model_o_series_model(self, model: str) -> bool:
if model in litellm.open_ai_chat_completion_models and (
"o1" in model or "o3" in model
):
return True
return False

Expand Down
11 changes: 7 additions & 4 deletions litellm/llms/openai/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,11 @@

from ...types.llms.openai import *
from ..base import BaseLLM
from .chat.o_series_transformation import OpenAIOSeriesConfig
from .common_utils import OpenAIError, drop_params_from_unprocessable_entity_error

openaiOSeriesConfig = OpenAIOSeriesConfig()


class MistralEmbeddingConfig:
"""
Expand Down Expand Up @@ -174,8 +177,8 @@ def get_supported_openai_params(self, model: str) -> list:
Returns:
list: List of supported openai parameters
"""
if litellm.openAIO1Config.is_model_o1_reasoning_model(model=model):
return litellm.openAIO1Config.get_supported_openai_params(model=model)
if openaiOSeriesConfig.is_model_o_series_model(model=model):
return openaiOSeriesConfig.get_supported_openai_params(model=model)
elif litellm.openAIGPTAudioConfig.is_model_gpt_audio_model(model=model):
return litellm.openAIGPTAudioConfig.get_supported_openai_params(model=model)
else:
Expand Down Expand Up @@ -203,8 +206,8 @@ def map_openai_params(
drop_params: bool,
) -> dict:
""" """
if litellm.openAIO1Config.is_model_o1_reasoning_model(model=model):
return litellm.openAIO1Config.map_openai_params(
if openaiOSeriesConfig.is_model_o_series_model(model=model):
return openaiOSeriesConfig.map_openai_params(
non_default_params=non_default_params,
optional_params=optional_params,
model=model,
Expand Down
3 changes: 2 additions & 1 deletion litellm/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -1201,7 +1201,8 @@ def completion( # type: ignore # noqa: PLR0915
if extra_headers is not None:
optional_params["extra_headers"] = extra_headers

if litellm.AzureOpenAIO1Config().is_o1_model(model=model):
if litellm.AzureOpenAIO1Config().is_o_series_model(model=model):

## LOAD CONFIG - if set
config = litellm.AzureOpenAIO1Config.get_config()
for k, v in config.items():
Expand Down
15 changes: 11 additions & 4 deletions litellm/model_prices_and_context_window_backup.json
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,11 @@
"cache_read_input_token_cost": 0.00000055,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": false,
"supports_vision": false,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_response_schema": true
},
"o3-mini-2025-01-31": {
"max_tokens": 100000,
Expand All @@ -223,8 +226,11 @@
"cache_read_input_token_cost": 0.00000055,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": false,
"supports_vision": false,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_response_schema": true
},
"o1-mini-2024-09-12": {
"max_tokens": 65536,
Expand Down Expand Up @@ -978,8 +984,9 @@
"cache_read_input_token_cost": 0.00000055,
"litellm_provider": "azure",
"mode": "chat",
"supports_vision": true,
"supports_prompt_caching": true
"supports_vision": false,
"supports_prompt_caching": true,
"supports_response_schema": true
},
"azure/o1-mini": {
"max_tokens": 65536,
Expand Down
8 changes: 4 additions & 4 deletions litellm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3485,7 +3485,7 @@ def _check_valid_arg(supported_params: List[str]):
),
)
elif custom_llm_provider == "azure":
if litellm.AzureOpenAIO1Config().is_o1_model(model=model):
if litellm.AzureOpenAIO1Config().is_o_series_model(model=model):
optional_params = litellm.AzureOpenAIO1Config().map_openai_params(
non_default_params=non_default_params,
optional_params=optional_params,
Expand Down Expand Up @@ -5918,9 +5918,9 @@ def get_provider_chat_config( # noqa: PLR0915
"""
if (
provider == LlmProviders.OPENAI
and litellm.openAIO1Config.is_model_o1_reasoning_model(model=model)
and litellm.openaiOSeriesConfig.is_model_o_series_model(model=model)
):
return litellm.OpenAIO1Config()
return litellm.openaiOSeriesConfig
elif litellm.LlmProviders.DEEPSEEK == provider:
return litellm.DeepSeekChatConfig()
elif litellm.LlmProviders.GROQ == provider:
Expand Down Expand Up @@ -5993,7 +5993,7 @@ def get_provider_chat_config( # noqa: PLR0915
):
return litellm.AI21ChatConfig()
elif litellm.LlmProviders.AZURE == provider:
if litellm.AzureOpenAIO1Config().is_o1_model(model=model):
if litellm.AzureOpenAIO1Config().is_o_series_model(model=model):
return litellm.AzureOpenAIO1Config()
return litellm.AzureOpenAIConfig()
elif litellm.LlmProviders.AZURE_AI == provider:
Expand Down
15 changes: 11 additions & 4 deletions model_prices_and_context_window.json
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,11 @@
"cache_read_input_token_cost": 0.00000055,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": false,
"supports_vision": false,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_response_schema": true
},
"o3-mini-2025-01-31": {
"max_tokens": 100000,
Expand All @@ -223,8 +226,11 @@
"cache_read_input_token_cost": 0.00000055,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": false,
"supports_vision": false,
"supports_prompt_caching": true
"supports_prompt_caching": true,
"supports_response_schema": true
},
"o1-mini-2024-09-12": {
"max_tokens": 65536,
Expand Down Expand Up @@ -978,8 +984,9 @@
"cache_read_input_token_cost": 0.00000055,
"litellm_provider": "azure",
"mode": "chat",
"supports_vision": true,
"supports_prompt_caching": true
"supports_vision": false,
"supports_prompt_caching": true,
"supports_response_schema": true
},
"azure/o1-mini": {
"max_tokens": 65536,
Expand Down
Loading

0 comments on commit 23f458d

Please sign in to comment.