Skip to content

Commit

Permalink
Update azure_api
Browse files Browse the repository at this point in the history
  • Loading branch information
samholt committed Apr 22, 2024
1 parent 11e4fb0 commit 5154c35
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 104 deletions.
11 changes: 9 additions & 2 deletions l2mac/config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from enum import Enum
from pathlib import Path
from typing import Optional

import yaml
from pydantic import BaseModel, ValidationError
Expand All @@ -14,11 +15,17 @@ class OpenAIRateLimitTier(str, Enum):
tier5 = "tier5"


class ApiType(str, Enum):
openai = "openai"
azure = "azure"


class LLMCoreConfig(BaseModel):
api_type: str = "openai"
api_type: ApiType = ApiType.openai
model: str = "gpt-4-1106-preview"
base_url: str = "https://api.openai.com/v1"
base_url: Optional[str] = "https://api.openai.com/v1"
api_key: str
api_version: Optional[str] = None


class LLMSettingsConfig(BaseModel):
Expand Down
172 changes: 82 additions & 90 deletions l2mac/llm_providers/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,13 @@
from time import perf_counter, sleep

import openai
from openai import AsyncOpenAI, OpenAI
from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI

from l2mac.config import ApiType
from l2mac.llm_providers.openai import (
openai_models,
openai_rate_limits_per_tier_per_model,
)
from l2mac.llm_providers.rate_limiter import ChatRateLimiter
from l2mac.llm_providers.utils import find_best_match

Expand All @@ -22,16 +27,18 @@ def remove_nulls(d):


def setup_chat_rate_limiter_internal(config: dict):
if config.llm.api_type == "azure":
model = config.run.model
model_details = AZURE_MODEL_DETAILS_MAP[model]
request_limit = model_details["properties"]["rate_limit_(Requests per minute)"]
token_limit = model_details["properties"]["rate_limit_(Tokens per minute)"]
elif config.llm.api_type == "openai":
from l2mac.llm_providers.openai import rate_limits_per_tier_per_model

if config.llm.api_type == ApiType.azure:
model = config.llm.model
model_details = find_best_match(
openai_rate_limits_per_tier_per_model[config.llm_settings.rate_limit_tier], model
)
request_limit = model_details["RPM"]
token_limit = model_details["TPM"]
elif config.llm.api_type == ApiType.openai:
model = config.llm.model
model_details = find_best_match(rate_limits_per_tier_per_model[config.llm_settings.rate_limit_tier], model)
model_details = find_best_match(
openai_rate_limits_per_tier_per_model[config.llm_settings.rate_limit_tier], model
)
request_limit = model_details["RPM"]
token_limit = model_details["TPM"]
else:
Expand All @@ -42,13 +49,11 @@ def setup_chat_rate_limiter_internal(config: dict):


def get_model_max_tokens(config: dict):
if config.llm.api_type == "azure":
model = config.run.model
model_details = AZURE_MODEL_DETAILS_MAP[model]
max_tokens = model_details["properties"]["max_tokens"]
elif config.llm.api_type == "openai":
from l2mac.llm_providers.openai import openai_models

if config.llm.api_type == ApiType.azure:
model = config.llm.model
model_details = find_best_match(openai_models, model)
max_tokens = model_details["context_window"]
elif config.llm.api_type == ApiType.openai:
model = config.llm.model
model_details = find_best_match(openai_models, model)
max_tokens = model_details["context_window"]
Expand All @@ -60,26 +65,28 @@ def get_model_max_tokens(config: dict):


def get_llm_config(config, logger, name, rate_limiter):
return deepcopy(
{
"model": config.llm.model,
"temperature": config.llm_settings.temperature,
"top_p": config.llm_settings.top_p,
"frequency_penalty": config.llm_settings.frequency_penalty,
"presence_penalty": config.llm_settings.presence_penalty,
"stop": config.llm_settings.stop,
"stream": config.llm_settings.api_stream,
"api_key": config.llm.api_key,
"_open_ai_rate_limit_requests_per_minute": config.llm_settings.rate_limit_requests_per_minute,
"_logger": logger,
"_name": name,
"_rate_limiter": rate_limiter,
"_retry_with_exponential_backoff__initial_delay": config.llm_settings.api_retry_with_exponential_backoff__initial_delay,
"_retry_with_exponential_backoff__exponential_base": config.llm_settings.api_retry_with_exponential_backoff__exponential_base,
"_retry_with_exponential_backoff__jitter": config.llm_settings.api_retry_with_exponential_backoff__jitter,
"_retry_with_exponential_backoff__max_retries": config.llm_settings.api_retry_with_exponential_backoff__max_retries,
}
)
llm_config_dict = {
"model": config.llm.model,
"api_type": config.llm.api_type,
"temperature": config.llm_settings.temperature,
"top_p": config.llm_settings.top_p,
"frequency_penalty": config.llm_settings.frequency_penalty,
"presence_penalty": config.llm_settings.presence_penalty,
"stop": config.llm_settings.stop,
"stream": config.llm_settings.api_stream,
"api_key": config.llm.api_key,
"_open_ai_rate_limit_requests_per_minute": config.llm_settings.rate_limit_requests_per_minute,
"_logger": logger,
"_name": name,
"_rate_limiter": rate_limiter,
"_retry_with_exponential_backoff__initial_delay": config.llm_settings.api_retry_with_exponential_backoff__initial_delay,
"_retry_with_exponential_backoff__exponential_base": config.llm_settings.api_retry_with_exponential_backoff__exponential_base,
"_retry_with_exponential_backoff__jitter": config.llm_settings.api_retry_with_exponential_backoff__jitter,
"_retry_with_exponential_backoff__max_retries": config.llm_settings.api_retry_with_exponential_backoff__max_retries,
}
if config.llm.api_type == ApiType.azure:
llm_config_dict.update({"azure_endpoint": config.llm.base_url, "api_version": config.llm.api_version})
return deepcopy(llm_config_dict)


def chat_completion_rl(**kwargs):
Expand Down Expand Up @@ -172,31 +179,25 @@ async def async_chat_completion_rl_inner(**kwargs):
kwargs.get("_logger", None)
kwargs.get("_name", None)
rate_limiter = kwargs.get("_rate_limiter", None)
model = kwargs.get("model", "gpt-3.5-turbo")
if kwargs.get("config", None) and kwargs.get("config", None).llm.api_type == "azure":
model_details = AZURE_MODEL_DETAILS_MAP[model]
kwargs.pop("model", None)
kwargs["engine"] = model_details["engine"]
kwargs["api_key"] = model_details["api_key"]
kwargs["api_version"] = model_details["api_version"]
kwargs["api_base"] = model_details["api_base"]
kwargs["api_type"] = model_details["api_type"]
kwargs.pop("_open_ai_rate_limit_requests_per_minute", None)
kwargs["_open_ai_rate_limit_requests_per_minute"] = model_details["properties"][
"rate_limit_(Requests per minute)"
]

# requests_per_minute = kwargs.get('_open_ai_rate_limit_requests_per_minute', 3000)
# delay_in_seconds = 60.0 / requests_per_minute
# time.sleep(delay_in_seconds)

aclient = AsyncOpenAI(api_key=kwargs["api_key"])
kwargs.pop("_open_ai_rate_limit_requests_per_minute", None)
kwargs.pop("_logger", None)
kwargs.pop("api_key", None)
kwargs.pop("_name", None)
kwargs.pop("_rate_limiter", None)
kwargs.pop("_rate_limiter", None)
api_type = kwargs.get("api_type", ApiType.openai)
if api_type == ApiType.openai:
aclient = AsyncOpenAI(api_key=kwargs["api_key"])
elif api_type == ApiType.azure:
aclient = AsyncAzureOpenAI(
api_key=kwargs["api_key"], api_version=kwargs["api_version"], azure_endpoint=kwargs["azure_endpoint"]
)
keys_to_remove = {
"_open_ai_rate_limit_requests_per_minute",
"_logger",
"_name",
"api_key",
"api_version",
"azure_endpoint",
"_rate_limiter",
"stream",
"api_type",
}
kwargs = {k: v for k, v in kwargs.items() if k not in keys_to_remove}
perf_counter()
# if logger:
# logger.info(f"[{name}][OpenAI API Request] {kwargs}")
Expand Down Expand Up @@ -239,38 +240,29 @@ def chat_completion_rl_inner(**kwargs):
kwargs.get("_logger", None)
kwargs.get("_name", None)
rate_limiter = kwargs.get("_rate_limiter", None)
model = kwargs.get("model", "gpt-3.5-turbo")
if kwargs.get("config", None) and kwargs.get("config", None).llm.api_type == "azure":
model_details = AZURE_MODEL_DETAILS_MAP[model]
kwargs.pop("model", None)
kwargs["engine"] = model_details["engine"]
kwargs["api_key"] = model_details["api_key"]
kwargs["api_version"] = model_details["api_version"]
kwargs["api_base"] = model_details["api_base"]
kwargs["api_type"] = model_details["api_type"]
kwargs.pop("_open_ai_rate_limit_requests_per_minute", None)
kwargs["_open_ai_rate_limit_requests_per_minute"] = model_details["properties"][
"rate_limit_(Requests per minute)"
]

# requests_per_minute = kwargs.get('_open_ai_rate_limit_requests_per_minute', 3000)
# delay_in_seconds = 60.0 / requests_per_minute
# time.sleep(delay_in_seconds)

client = OpenAI(api_key=kwargs["api_key"])
kwargs.pop("_open_ai_rate_limit_requests_per_minute", None)
kwargs.pop("_logger", None)
kwargs.pop("_name", None)
kwargs.pop("api_key", None)
kwargs.pop("_rate_limiter", None)
kwargs.pop("_rate_limiter", None)
kwargs.pop("stream", None)

api_type = kwargs.get("api_type", ApiType.openai)
if api_type == ApiType.openai:
client = OpenAI(api_key=kwargs["api_key"])
elif api_type == ApiType.azure:
client = AzureOpenAI(
api_key=kwargs["api_key"], api_version=kwargs["api_version"], azure_endpoint=kwargs["azure_endpoint"]
)
keys_to_remove = {
"_open_ai_rate_limit_requests_per_minute",
"_logger",
"_name",
"api_key",
"api_version",
"azure_endpoint",
"_rate_limiter",
"stream",
"api_type",
}
kwargs = {k: v for k, v in kwargs.items() if k not in keys_to_remove}
perf_counter()
# if logger:
# logger.info(f"[{name}][OpenAI API Request] {kwargs}")
# pretty_print_chat_messages(kwargs['messages'])

if rate_limiter:
rate_limiter.consume(**kwargs)
response = client.chat.completions.create(**kwargs)
Expand Down
24 changes: 12 additions & 12 deletions l2mac/llm_providers/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
},
}

rate_limits_per_tier_per_model = { # Updated from https://platform.openai.com/docs/guides/rate-limits/usage-tiers?context=tier-free on 16th April 2024
openai_rate_limits_per_tier_per_model = { # Updated from https://platform.openai.com/docs/guides/rate-limits/usage-tiers?context=tier-free on 16th April 2024
"free": {
"gpt-3.5-turbo": {"RPM": 3, "RPD": 200, "TPM": 40000, "Batch Queue Limit": 200000},
"text-embedding-3-small": {"RPM": 3, "RPD": 200, "TPM": 150000, "Batch Queue Limit": None},
Expand All @@ -122,10 +122,10 @@
"dall-e-3": {"RPM": "5 img/min", "RPD": None, "TPM": None, "Batch Queue Limit": None},
},
"tier2": {
"gpt-4-turbo": {"RPM": "5,000", "TPM": "450,000", "Batch Queue Limit": "1,350,000"},
"gpt-4": {"RPM": "5,000", "TPM": "40,000", "Batch Queue Limit": "200,000"},
"gpt-3.5-turbo": {"RPM": "3,500", "TPM": "80,000", "Batch Queue Limit": "400,000"},
"text-embedding-3-large": {"RPM": "500", "TPM": "1,000,000", "Batch Queue Limit": None},
"gpt-4-turbo": {"RPM": 5000, "TPM": 450000, "Batch Queue Limit": 1350000},
"gpt-4": {"RPM": 5000, "TPM": 40000, "Batch Queue Limit": 200000},
"gpt-3.5-turbo": {"RPM": 3500, "TPM": 80000, "Batch Queue Limit": 400000},
"text-embedding-3-large": {"RPM": 500, "TPM": 1000000, "Batch Queue Limit": None},
"whisper-1": {"RPM": "50", "TPM": None, "Batch Queue Limit": None},
"tts-1": {"RPM": "50", "TPM": None, "Batch Queue Limit": None},
"tts-1-hd": {"RPM": "5", "TPM": None, "Batch Queue Limit": None},
Expand Down Expand Up @@ -155,13 +155,13 @@
"dall-e-3": {"RPM": "15 img/min", "TPM": None, "Batch Queue Limit": None},
},
"tier5": {
"gpt-4-turbo": {"RPM": "10,000", "TPM": "1,500,000", "Batch Queue Limit": "250,000,000"},
"gpt-4": {"RPM": "10,000", "TPM": "300,000", "Batch Queue Limit": "45,000,000"},
"gpt-3.5-turbo": {"RPM": "10,000", "TPM": "2,000,000", "Batch Queue Limit": "300,000,000"},
"text-embedding-3-large": {"RPM": "10,000", "TPM": "10,000,000", "Batch Queue Limit": None},
"whisper-1": {"RPM": "500", "TPM": None, "Batch Queue Limit": None},
"tts-1": {"RPM": "500", "TPM": None, "Batch Queue Limit": None},
"tts-1-hd": {"RPM": "20", "TPM": None, "Batch Queue Limit": None},
"gpt-4-turbo": {"RPM": 10000, "TPM": 1500000, "Batch Queue Limit": 250000000},
"gpt-4": {"RPM": 10000, "TPM": 300000, "Batch Queue Limit": 45000000},
"gpt-3.5-turbo": {"RPM": 10000, "TPM": 2000000, "Batch Queue Limit": 300000000},
"text-embedding-3-large": {"RPM": 10000, "TPM": 10000000, "Batch Queue Limit": None},
"whisper-1": {"RPM": 500, "TPM": None, "Batch Queue Limit": None},
"tts-1": {"RPM": 500, "TPM": None, "Batch Queue Limit": None},
"tts-1-hd": {"RPM": 20, "TPM": None, "Batch Queue Limit": None},
"dall-e-2": {"RPM": "500 img/min", "TPM": None, "Batch Queue Limit": None},
"dall-e-3": {"RPM": "50 img/min", "TPM": None, "Batch Queue Limit": None},
},
Expand Down

0 comments on commit 5154c35

Please sign in to comment.