diff --git a/litellm/llms/together_ai/chat.py b/litellm/llms/together_ai/chat.py index 06d33f697502..79671e1f50a4 100644 --- a/litellm/llms/together_ai/chat.py +++ b/litellm/llms/together_ai/chat.py @@ -8,7 +8,8 @@ from typing import Optional -from litellm import get_model_info, verbose_logger +from litellm import verbose_logger +from litellm.utils import get_model_param_support from ..openai.chat.gpt_transformation import OpenAIGPTConfig @@ -22,9 +23,8 @@ def get_supported_openai_params(self, model: str) -> list: """ supports_function_calling: Optional[bool] = None try: - model_info = get_model_info(model, custom_llm_provider="together_ai") - supports_function_calling = model_info.get( - "supports_function_calling", False + supports_function_calling = get_model_param_support( + model, "supports_function_calling", custom_llm_provider="together_ai" ) except Exception as e: verbose_logger.debug(f"Error getting supported openai params: {e}") diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index ae3117497f87..77dee5b3077a 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -1956,7 +1956,7 @@ }, "mistral/open-mixtral-8x22b": { "max_tokens": 8191, - "max_input_tokens": 64000, + "max_input_tokens": 65336, "max_output_tokens": 8191, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000006, @@ -2038,7 +2038,7 @@ }, "deepseek/deepseek-reasoner": { "max_tokens": 8192, - "max_input_tokens": 64000, + "max_input_tokens": 65536, "max_output_tokens": 8192, "input_cost_per_token": 0.00000055, "input_cost_per_token_cache_hit": 0.00000014, @@ -2051,9 +2051,9 @@ "supports_prompt_caching": true }, "deepseek/deepseek-chat": { - "max_tokens": 4096, - "max_input_tokens": 128000, - "max_output_tokens": 4096, + "max_tokens": 8192, + "max_input_tokens": 65536, + "max_output_tokens": 8192, "input_cost_per_token": 0.00000014, "input_cost_per_token_cache_hit": 0.000000014, "cache_read_input_token_cost": 0.000000014, @@ -3064,7 +3064,8 @@ "supports_function_calling": true, "supports_tool_choice": true, "supports_response_schema": true, - "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-1.5-pro" + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-1.5-pro", + "deprecation_date": "2025-09-24" }, "gemini-1.5-pro-001": { "max_tokens": 8192, @@ -3265,7 +3266,9 @@ "supports_function_calling": true, "supports_vision": true, "supports_response_schema": true, - "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-1.5-flash" + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-1.5-flash", + "deprecation_date": "2025-09-24", + "deprecation_date": "2025-09-24" }, "gemini-1.5-flash-001": { "max_tokens": 8192, @@ -4128,7 +4131,8 @@ "supports_prompt_caching": true, "tpm": 4000000, "rpm": 2000, - "source": "https://ai.google.dev/pricing" + "source": "https://ai.google.dev/pricing", + "deprecation_date": "2025-09-24" }, "gemini/gemini-1.5-flash-001": { "max_tokens": 8192, @@ -4412,7 +4416,8 @@ "supports_prompt_caching": true, "tpm": 4000000, "rpm": 1000, - "source": "https://ai.google.dev/pricing" + "source": "https://ai.google.dev/pricing", + "deprecation_date": "2025-09-24" }, "gemini/gemini-1.5-pro-001": { "max_tokens": 8192, @@ -4837,7 +4842,7 @@ }, "openrouter/deepseek/deepseek-r1": { "max_tokens": 8192, - "max_input_tokens": 64000, + "max_input_tokens": 65336, "max_output_tokens": 8192, "input_cost_per_token": 0.00000055, "input_cost_per_token_cache_hit": 0.00000014, @@ -4851,8 +4856,8 @@ }, "openrouter/deepseek/deepseek-chat": { "max_tokens": 8192, - "max_input_tokens": 66000, - "max_output_tokens": 4096, + "max_input_tokens": 65536, + "max_output_tokens": 8192, "input_cost_per_token": 0.00000014, "output_cost_per_token": 0.00000028, "litellm_provider": "openrouter", diff --git a/litellm/utils.py b/litellm/utils.py index 9e5e8c8cba95..e0115af797ba 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -419,7 +419,6 @@ def _custom_logger_class_exists_in_failure_callbacks( def function_setup( # noqa: PLR0915 original_function: str, rules_obj, start_time, *args, **kwargs ): # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc. - ### NOTICES ### from litellm import Logging as LiteLLMLogging from litellm.litellm_core_utils.litellm_logging import set_callbacks @@ -4216,7 +4215,6 @@ def _get_model_info_helper( # noqa: PLR0915 ): _model_info = None if _model_info is None and model in litellm.model_cost: - key = model _model_info = _get_model_info_from_model_cost(key=key) if not _check_provider_match( @@ -4227,7 +4225,6 @@ def _get_model_info_helper( # noqa: PLR0915 _model_info is None and combined_stripped_model_name in litellm.model_cost ): - key = combined_stripped_model_name _model_info = _get_model_info_from_model_cost(key=key) if not _check_provider_match( @@ -4235,7 +4232,6 @@ def _get_model_info_helper( # noqa: PLR0915 ): _model_info = None if _model_info is None and stripped_model_name in litellm.model_cost: - key = stripped_model_name _model_info = _get_model_info_from_model_cost(key=key) if not _check_provider_match( @@ -4243,7 +4239,6 @@ def _get_model_info_helper( # noqa: PLR0915 ): _model_info = None if _model_info is None and split_model in litellm.model_cost: - key = split_model _model_info = _get_model_info_from_model_cost(key=key) if not _check_provider_match( @@ -4362,6 +4357,25 @@ def _get_model_info_helper( # noqa: PLR0915 ) +def get_model_param_support( + model: str, param: str, custom_llm_provider: Optional[str] +) -> bool: + model_info = litellm.model_cost.get(model) + potential_model_names = _get_potential_model_names( + model=model, custom_llm_provider=custom_llm_provider + ) + + verbose_logger.debug( + f"checking potential_model_names in litellm.model_cost: {potential_model_names}" + ) + # iterate over potential model names to get the model cost + for key, value in potential_model_names.items(): + if value in litellm.model_cost and key != "custom_llm_provider": + return litellm.model_cost[value].get(param, False) + + return False + + def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> ModelInfo: """ Get a dict for the maximum tokens (context window), input_cost_per_token, output_cost_per_token for a given model.