BerriAI · theGitNoob · Feb 4, 2025 · Feb 4, 2025
diff --git a/litellm/llms/together_ai/chat.py b/litellm/llms/together_ai/chat.py
@@ -8,7 +8,8 @@
 
 from typing import Optional
 
-from litellm import get_model_info, verbose_logger
+from litellm import verbose_logger
+from litellm.utils import get_model_param_support
 
 from ..openai.chat.gpt_transformation import OpenAIGPTConfig
 
@@ -22,9 +23,8 @@ def get_supported_openai_params(self, model: str) -> list:
         """
         supports_function_calling: Optional[bool] = None
         try:
-            model_info = get_model_info(model, custom_llm_provider="together_ai")
-            supports_function_calling = model_info.get(
-                "supports_function_calling", False
+            supports_function_calling = get_model_param_support(
+                model, "supports_function_calling", custom_llm_provider="together_ai"
             )
         except Exception as e:
             verbose_logger.debug(f"Error getting supported openai params: {e}")

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
@@ -1956,7 +1956,7 @@
     },
     "mistral/open-mixtral-8x22b": {
         "max_tokens": 8191,
-        "max_input_tokens": 64000,
+        "max_input_tokens": 65336,
         "max_output_tokens": 8191,
         "input_cost_per_token": 0.000002,
         "output_cost_per_token": 0.000006,
@@ -2038,7 +2038,7 @@
     },
     "deepseek/deepseek-reasoner": {
         "max_tokens": 8192,
-        "max_input_tokens": 64000,
+        "max_input_tokens": 65536,
         "max_output_tokens": 8192,
         "input_cost_per_token": 0.00000055,
         "input_cost_per_token_cache_hit": 0.00000014,
@@ -2051,9 +2051,9 @@
         "supports_prompt_caching": true
     },
     "deepseek/deepseek-chat": {
-        "max_tokens": 4096,
-        "max_input_tokens": 128000,
-        "max_output_tokens": 4096,
+        "max_tokens": 8192,
+        "max_input_tokens": 65536,
+        "max_output_tokens": 8192,
         "input_cost_per_token": 0.00000014,
         "input_cost_per_token_cache_hit": 0.000000014,
         "cache_read_input_token_cost": 0.000000014,
@@ -3064,7 +3064,8 @@
         "supports_function_calling": true,
         "supports_tool_choice": true, 
         "supports_response_schema": true, 
-        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-1.5-pro"
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-1.5-pro",
+        "deprecation_date": "2025-09-24"
     },
     "gemini-1.5-pro-001": { 
         "max_tokens": 8192,
@@ -3265,7 +3266,9 @@
         "supports_function_calling": true,
         "supports_vision": true,
         "supports_response_schema": true,
-        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-1.5-flash"
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-1.5-flash",
+        "deprecation_date": "2025-09-24",
+        "deprecation_date": "2025-09-24"
     },
     "gemini-1.5-flash-001": {
         "max_tokens": 8192,
@@ -4128,7 +4131,8 @@
         "supports_prompt_caching": true,
         "tpm": 4000000,
         "rpm": 2000,
-        "source": "https://ai.google.dev/pricing"
+        "source": "https://ai.google.dev/pricing",
+        "deprecation_date": "2025-09-24"
     },
     "gemini/gemini-1.5-flash-001": {
         "max_tokens": 8192,
@@ -4412,7 +4416,8 @@
         "supports_prompt_caching": true,
         "tpm": 4000000,
         "rpm": 1000,
-        "source": "https://ai.google.dev/pricing"
+        "source": "https://ai.google.dev/pricing",
+        "deprecation_date": "2025-09-24"
     },
     "gemini/gemini-1.5-pro-001": {
         "max_tokens": 8192,
@@ -4837,7 +4842,7 @@
     },
     "openrouter/deepseek/deepseek-r1": {
         "max_tokens": 8192,
-        "max_input_tokens": 64000,
+        "max_input_tokens": 65336,
         "max_output_tokens": 8192,
         "input_cost_per_token": 0.00000055,
         "input_cost_per_token_cache_hit": 0.00000014,
@@ -4851,8 +4856,8 @@
     },
     "openrouter/deepseek/deepseek-chat": {
         "max_tokens": 8192,
-        "max_input_tokens": 66000,
-        "max_output_tokens": 4096,
+        "max_input_tokens": 65536,
+        "max_output_tokens": 8192,
         "input_cost_per_token": 0.00000014,
         "output_cost_per_token": 0.00000028,
         "litellm_provider": "openrouter",

diff --git a/litellm/utils.py b/litellm/utils.py
@@ -419,7 +419,6 @@ def _custom_logger_class_exists_in_failure_callbacks(
 def function_setup(  # noqa: PLR0915
     original_function: str, rules_obj, start_time, *args, **kwargs
 ):  # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
-
     ### NOTICES ###
     from litellm import Logging as LiteLLMLogging
     from litellm.litellm_core_utils.litellm_logging import set_callbacks
@@ -4216,7 +4215,6 @@ def _get_model_info_helper(  # noqa: PLR0915
                 ):
                     _model_info = None
             if _model_info is None and model in litellm.model_cost:
-
                 key = model
                 _model_info = _get_model_info_from_model_cost(key=key)
                 if not _check_provider_match(
@@ -4227,23 +4225,20 @@ def _get_model_info_helper(  # noqa: PLR0915
                 _model_info is None
                 and combined_stripped_model_name in litellm.model_cost
             ):
-
                 key = combined_stripped_model_name
                 _model_info = _get_model_info_from_model_cost(key=key)
                 if not _check_provider_match(
                     model_info=_model_info, custom_llm_provider=custom_llm_provider
                 ):
                     _model_info = None
             if _model_info is None and stripped_model_name in litellm.model_cost:
-
                 key = stripped_model_name
                 _model_info = _get_model_info_from_model_cost(key=key)
                 if not _check_provider_match(
                     model_info=_model_info, custom_llm_provider=custom_llm_provider
                 ):
                     _model_info = None
             if _model_info is None and split_model in litellm.model_cost:
-
                 key = split_model
                 _model_info = _get_model_info_from_model_cost(key=key)
                 if not _check_provider_match(
@@ -4362,6 +4357,25 @@ def _get_model_info_helper(  # noqa: PLR0915
         )
 
 
+def get_model_param_support(
+    model: str, param: str, custom_llm_provider: Optional[str]
+) -> bool:
+    model_info = litellm.model_cost.get(model)
+    potential_model_names = _get_potential_model_names(
+        model=model, custom_llm_provider=custom_llm_provider
+    )
+
+    verbose_logger.debug(
+        f"checking potential_model_names in litellm.model_cost: {potential_model_names}"
+    )
+    # iterate over potential model names to get the model cost
+    for key, value in potential_model_names.items():
+        if value in litellm.model_cost and key != "custom_llm_provider":
+            return litellm.model_cost[value].get(param, False)
+
+    return False
+
+
 def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> ModelInfo:
     """
     Get a dict for the maximum tokens (context window), input_cost_per_token, output_cost_per_token  for a given model.