From d005138ce46584a1e67958003559b3ba12f8718c Mon Sep 17 00:00:00 2001
From: Bagatur <baskaryan@gmail.com>
Date: Thu, 3 Oct 2024 10:21:11 -0700
Subject: [PATCH 01/13] core[patch]: add UsageMetadata details

---
 libs/core/langchain_core/messages/ai.py | 74 +++++++++++++++++++++++--
 1 file changed, 69 insertions(+), 5 deletions(-)

diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py
index 03a22e79764f5..a95503fdc3966 100644
--- a/libs/core/langchain_core/messages/ai.py
+++ b/libs/core/langchain_core/messages/ai.py
@@ -2,7 +2,7 @@
 from typing import Any, Literal, Optional, Union
 
 from pydantic import model_validator
-from typing_extensions import Self, TypedDict
+from typing_extensions import NotRequired, Self, TypedDict
 
 from langchain_core.messages.base import (
     BaseMessage,
@@ -29,6 +29,62 @@
 from langchain_core.utils.json import parse_partial_json
 
 
+class InputTokenDetails(TypedDict, total=False):
+    """Breakdown of input token counts.
+
+    Does *not* need to sum to full input token count. Does *not* need to have all keys.
+
+    Example:
+
+        .. code-block:: python
+
+            {
+                "audio": 10,
+                "cache_creation": 200,
+                "cache_read": 100,
+            }
+    """
+
+    audio: int
+    """Audio input tokens."""
+    cache_creation: int
+    """Input tokens that were cached and there was a cache miss.
+
+    Since there was a cache miss, the cache was created from these tokens.
+    """
+    cache_read: int
+    """Input tokens that were cached and there was a cache hit.
+
+    Since there was a cache hit, the tokens were read from the cache. More precisely,
+    the model state given these tokens was read from the cache.
+    """
+
+
+class OutputTokenDetails(TypedDict, total=False):
+    """Breakdown of output token counts.
+
+    Does *not* need to sum to full output token count. Does *not* need to have all keys.
+
+    Example:
+
+        .. code-block:: python
+
+            {
+                "audio": 10,
+                "reasoning": 200,
+            }
+    """
+
+    audio: int
+    """Audio output tokens."""
+    reasoning: int
+    """Reasoning output tokens.
+
+    Tokens generated by the model in a chain of thought process (i.e. by OpenAI's o1
+    models) that are not returned as part of model output.
+    """
+
+
 class UsageMetadata(TypedDict):
     """Usage metadata for a message, such as token counts.
 
@@ -41,16 +97,24 @@ class UsageMetadata(TypedDict):
             {
                 "input_tokens": 10,
                 "output_tokens": 20,
-                "total_tokens": 30
+                "total_tokens": 30,
+                "input_token_details": {
+                    ...
+                },
+                "output_token_details": {
+                    ...
+                },
             }
     """
 
     input_tokens: int
-    """Count of input (or prompt) tokens."""
+    """Count of input (or prompt) tokens. Sum of all input token types."""
     output_tokens: int
-    """Count of output (or completion) tokens."""
+    """Count of output (or completion) tokens. Sum of all output token types."""
     total_tokens: int
-    """Total token count."""
+    """Total token count. Sum of input_tokens + output_tokens."""
+    input_token_details: NotRequired[InputTokenDetails]
+    output_token_details: NotRequired[OutputTokenDetails]
 
 
 class AIMessage(BaseMessage):

From 7ec41fc520d1f69158f054e0c52598c81d17681e Mon Sep 17 00:00:00 2001
From: Bagatur <baskaryan@gmail.com>
Date: Thu, 3 Oct 2024 10:22:04 -0700
Subject: [PATCH 02/13] fmt

---
 libs/core/langchain_core/messages/ai.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py
index a95503fdc3966..6336cc9b9fb76 100644
--- a/libs/core/langchain_core/messages/ai.py
+++ b/libs/core/langchain_core/messages/ai.py
@@ -99,11 +99,14 @@ class UsageMetadata(TypedDict):
                 "output_tokens": 20,
                 "total_tokens": 30,
                 "input_token_details": {
-                    ...
+                    "audio": 10,
+                    "cache_creation": 200,
+                    "cache_read": 100,
                 },
                 "output_token_details": {
-                    ...
-                },
+                    "audio": 10,
+                    "reasoning": 200,
+                }
             }
     """
 

From 12414a0983c9f47b2fe4c44f46128c4a2fd05148 Mon Sep 17 00:00:00 2001
From: Bagatur <baskaryan@gmail.com>
Date: Thu, 3 Oct 2024 10:24:24 -0700
Subject: [PATCH 03/13] snapshots

---
 .../prompts/__snapshots__/test_chat.ambr      | 156 ++++-
 .../runnables/__snapshots__/test_graph.ambr   |  78 ++-
 .../__snapshots__/test_runnable.ambr          | 624 +++++++++++++++++-
 3 files changed, 847 insertions(+), 11 deletions(-)

diff --git a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr
index 5a235a0f9f933..85513fd122873 100644
--- a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr
+++ b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr
@@ -677,6 +677,39 @@
         'title': 'HumanMessageChunk',
         'type': 'object',
       }),
+      'InputTokenDetails': dict({
+        'description': '''
+          Breakdown of input token counts.
+          
+          Does *not* need to sum to full input token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "cache_creation": 200,
+                      "cache_read": 100,
+                  }
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'cache_creation': dict({
+            'title': 'Cache Creation',
+            'type': 'integer',
+          }),
+          'cache_read': dict({
+            'title': 'Cache Read',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'InputTokenDetails',
+        'type': 'object',
+      }),
       'InvalidToolCall': dict({
         'description': '''
           Allowance for errors made by LLM.
@@ -743,6 +776,34 @@
         'title': 'InvalidToolCall',
         'type': 'object',
       }),
+      'OutputTokenDetails': dict({
+        'description': '''
+          Breakdown of output token counts.
+          
+          Does *not* need to sum to full output token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "reasoning": 200,
+                  }
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'reasoning': dict({
+            'title': 'Reasoning',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'OutputTokenDetails',
+        'type': 'object',
+      }),
       'SystemMessage': dict({
         'additionalProperties': True,
         'description': '''
@@ -1247,14 +1308,29 @@
                   {
                       "input_tokens": 10,
                       "output_tokens": 20,
-                      "total_tokens": 30
+                      "total_tokens": 30,
+                      "input_token_details": {
+                          "audio": 10,
+                          "cache_creation": 200,
+                          "cache_read": 100,
+                      },
+                      "output_token_details": {
+                          "audio": 10,
+                          "reasoning": 200,
+                      }
                   }
         ''',
         'properties': dict({
+          'input_token_details': dict({
+            '$ref': '#/$defs/InputTokenDetails',
+          }),
           'input_tokens': dict({
             'title': 'Input Tokens',
             'type': 'integer',
           }),
+          'output_token_details': dict({
+            '$ref': '#/$defs/OutputTokenDetails',
+          }),
           'output_tokens': dict({
             'title': 'Output Tokens',
             'type': 'integer',
@@ -2008,6 +2084,39 @@
         'title': 'HumanMessageChunk',
         'type': 'object',
       }),
+      'InputTokenDetails': dict({
+        'description': '''
+          Breakdown of input token counts.
+          
+          Does *not* need to sum to full input token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "cache_creation": 200,
+                      "cache_read": 100,
+                  }
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'cache_creation': dict({
+            'title': 'Cache Creation',
+            'type': 'integer',
+          }),
+          'cache_read': dict({
+            'title': 'Cache Read',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'InputTokenDetails',
+        'type': 'object',
+      }),
       'InvalidToolCall': dict({
         'description': '''
           Allowance for errors made by LLM.
@@ -2074,6 +2183,34 @@
         'title': 'InvalidToolCall',
         'type': 'object',
       }),
+      'OutputTokenDetails': dict({
+        'description': '''
+          Breakdown of output token counts.
+          
+          Does *not* need to sum to full output token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "reasoning": 200,
+                  }
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'reasoning': dict({
+            'title': 'Reasoning',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'OutputTokenDetails',
+        'type': 'object',
+      }),
       'SystemMessage': dict({
         'additionalProperties': True,
         'description': '''
@@ -2578,14 +2715,29 @@
                   {
                       "input_tokens": 10,
                       "output_tokens": 20,
-                      "total_tokens": 30
+                      "total_tokens": 30,
+                      "input_token_details": {
+                          "audio": 10,
+                          "cache_creation": 200,
+                          "cache_read": 100,
+                      },
+                      "output_token_details": {
+                          "audio": 10,
+                          "reasoning": 200,
+                      }
                   }
         ''',
         'properties': dict({
+          'input_token_details': dict({
+            '$ref': '#/$defs/InputTokenDetails',
+          }),
           'input_tokens': dict({
             'title': 'Input Tokens',
             'type': 'integer',
           }),
+          'output_token_details': dict({
+            '$ref': '#/$defs/OutputTokenDetails',
+          }),
           'output_tokens': dict({
             'title': 'Output Tokens',
             'type': 'integer',
diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr
index aa70b9bb5a3e9..13ebdea6171e8 100644
--- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr
+++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr
@@ -1037,6 +1037,39 @@
               'title': 'HumanMessageChunk',
               'type': 'object',
             }),
+            'InputTokenDetails': dict({
+              'description': '''
+                Breakdown of input token counts.
+                
+                Does *not* need to sum to full input token count. Does *not* need to have all keys.
+                
+                Example:
+                
+                    .. code-block:: python
+                
+                        {
+                            "audio": 10,
+                            "cache_creation": 200,
+                            "cache_read": 100,
+                        }
+              ''',
+              'properties': dict({
+                'audio': dict({
+                  'title': 'Audio',
+                  'type': 'integer',
+                }),
+                'cache_creation': dict({
+                  'title': 'Cache Creation',
+                  'type': 'integer',
+                }),
+                'cache_read': dict({
+                  'title': 'Cache Read',
+                  'type': 'integer',
+                }),
+              }),
+              'title': 'InputTokenDetails',
+              'type': 'object',
+            }),
             'InvalidToolCall': dict({
               'description': '''
                 Allowance for errors made by LLM.
@@ -1103,6 +1136,34 @@
               'title': 'InvalidToolCall',
               'type': 'object',
             }),
+            'OutputTokenDetails': dict({
+              'description': '''
+                Breakdown of output token counts.
+                
+                Does *not* need to sum to full output token count. Does *not* need to have all keys.
+                
+                Example:
+                
+                    .. code-block:: python
+                
+                        {
+                            "audio": 10,
+                            "reasoning": 200,
+                        }
+              ''',
+              'properties': dict({
+                'audio': dict({
+                  'title': 'Audio',
+                  'type': 'integer',
+                }),
+                'reasoning': dict({
+                  'title': 'Reasoning',
+                  'type': 'integer',
+                }),
+              }),
+              'title': 'OutputTokenDetails',
+              'type': 'object',
+            }),
             'SystemMessage': dict({
               'additionalProperties': True,
               'description': '''
@@ -1607,14 +1668,29 @@
                         {
                             "input_tokens": 10,
                             "output_tokens": 20,
-                            "total_tokens": 30
+                            "total_tokens": 30,
+                            "input_token_details": {
+                                "audio": 10,
+                                "cache_creation": 200,
+                                "cache_read": 100,
+                            },
+                            "output_token_details": {
+                                "audio": 10,
+                                "reasoning": 200,
+                            }
                         }
               ''',
               'properties': dict({
+                'input_token_details': dict({
+                  '$ref': '#/$defs/InputTokenDetails',
+                }),
                 'input_tokens': dict({
                   'title': 'Input Tokens',
                   'type': 'integer',
                 }),
+                'output_token_details': dict({
+                  '$ref': '#/$defs/OutputTokenDetails',
+                }),
                 'output_tokens': dict({
                   'title': 'Output Tokens',
                   'type': 'integer',
diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr
index 045b9eced274c..1df398b7298ad 100644
--- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr
+++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr
@@ -2639,6 +2639,39 @@
         'title': 'HumanMessageChunk',
         'type': 'object',
       }),
+      'InputTokenDetails': dict({
+        'description': '''
+          Breakdown of input token counts.
+          
+          Does *not* need to sum to full input token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "cache_creation": 200,
+                      "cache_read": 100,
+                  }
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'cache_creation': dict({
+            'title': 'Cache Creation',
+            'type': 'integer',
+          }),
+          'cache_read': dict({
+            'title': 'Cache Read',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'InputTokenDetails',
+        'type': 'object',
+      }),
       'InvalidToolCall': dict({
         'description': '''
           Allowance for errors made by LLM.
@@ -2705,6 +2738,34 @@
         'title': 'InvalidToolCall',
         'type': 'object',
       }),
+      'OutputTokenDetails': dict({
+        'description': '''
+          Breakdown of output token counts.
+          
+          Does *not* need to sum to full output token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "reasoning": 200,
+                  }
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'reasoning': dict({
+            'title': 'Reasoning',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'OutputTokenDetails',
+        'type': 'object',
+      }),
       'SystemMessage': dict({
         'additionalProperties': True,
         'description': '''
@@ -3209,14 +3270,29 @@
                   {
                       "input_tokens": 10,
                       "output_tokens": 20,
-                      "total_tokens": 30
+                      "total_tokens": 30,
+                      "input_token_details": {
+                          "audio": 10,
+                          "cache_creation": 200,
+                          "cache_read": 100,
+                      },
+                      "output_token_details": {
+                          "audio": 10,
+                          "reasoning": 200,
+                      }
                   }
         ''',
         'properties': dict({
+          'input_token_details': dict({
+            '$ref': '#/$defs/InputTokenDetails',
+          }),
           'input_tokens': dict({
             'title': 'Input Tokens',
             'type': 'integer',
           }),
+          'output_token_details': dict({
+            '$ref': '#/$defs/OutputTokenDetails',
+          }),
           'output_tokens': dict({
             'title': 'Output Tokens',
             'type': 'integer',
@@ -4028,6 +4104,39 @@
         'title': 'HumanMessageChunk',
         'type': 'object',
       }),
+      'InputTokenDetails': dict({
+        'description': '''
+          Breakdown of input token counts.
+          
+          Does *not* need to sum to full input token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "cache_creation": 200,
+                      "cache_read": 100,
+                  }
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'cache_creation': dict({
+            'title': 'Cache Creation',
+            'type': 'integer',
+          }),
+          'cache_read': dict({
+            'title': 'Cache Read',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'InputTokenDetails',
+        'type': 'object',
+      }),
       'InvalidToolCall': dict({
         'description': '''
           Allowance for errors made by LLM.
@@ -4094,6 +4203,34 @@
         'title': 'InvalidToolCall',
         'type': 'object',
       }),
+      'OutputTokenDetails': dict({
+        'description': '''
+          Breakdown of output token counts.
+          
+          Does *not* need to sum to full output token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "reasoning": 200,
+                  }
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'reasoning': dict({
+            'title': 'Reasoning',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'OutputTokenDetails',
+        'type': 'object',
+      }),
       'StringPromptValue': dict({
         'description': 'String prompt value.',
         'properties': dict({
@@ -4617,14 +4754,29 @@
                   {
                       "input_tokens": 10,
                       "output_tokens": 20,
-                      "total_tokens": 30
+                      "total_tokens": 30,
+                      "input_token_details": {
+                          "audio": 10,
+                          "cache_creation": 200,
+                          "cache_read": 100,
+                      },
+                      "output_token_details": {
+                          "audio": 10,
+                          "reasoning": 200,
+                      }
                   }
         ''',
         'properties': dict({
+          'input_token_details': dict({
+            '$ref': '#/$defs/InputTokenDetails',
+          }),
           'input_tokens': dict({
             'title': 'Input Tokens',
             'type': 'integer',
           }),
+          'output_token_details': dict({
+            '$ref': '#/$defs/OutputTokenDetails',
+          }),
           'output_tokens': dict({
             'title': 'Output Tokens',
             'type': 'integer',
@@ -5448,6 +5600,39 @@
         'title': 'HumanMessageChunk',
         'type': 'object',
       }),
+      'InputTokenDetails': dict({
+        'description': '''
+          Breakdown of input token counts.
+          
+          Does *not* need to sum to full input token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "cache_creation": 200,
+                      "cache_read": 100,
+                  }
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'cache_creation': dict({
+            'title': 'Cache Creation',
+            'type': 'integer',
+          }),
+          'cache_read': dict({
+            'title': 'Cache Read',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'InputTokenDetails',
+        'type': 'object',
+      }),
       'InvalidToolCall': dict({
         'description': '''
           Allowance for errors made by LLM.
@@ -5514,6 +5699,34 @@
         'title': 'InvalidToolCall',
         'type': 'object',
       }),
+      'OutputTokenDetails': dict({
+        'description': '''
+          Breakdown of output token counts.
+          
+          Does *not* need to sum to full output token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "reasoning": 200,
+                  }
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'reasoning': dict({
+            'title': 'Reasoning',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'OutputTokenDetails',
+        'type': 'object',
+      }),
       'StringPromptValue': dict({
         'description': 'String prompt value.',
         'properties': dict({
@@ -6037,14 +6250,29 @@
                   {
                       "input_tokens": 10,
                       "output_tokens": 20,
-                      "total_tokens": 30
+                      "total_tokens": 30,
+                      "input_token_details": {
+                          "audio": 10,
+                          "cache_creation": 200,
+                          "cache_read": 100,
+                      },
+                      "output_token_details": {
+                          "audio": 10,
+                          "reasoning": 200,
+                      }
                   }
         ''',
         'properties': dict({
+          'input_token_details': dict({
+            '$ref': '#/definitions/InputTokenDetails',
+          }),
           'input_tokens': dict({
             'title': 'Input Tokens',
             'type': 'integer',
           }),
+          'output_token_details': dict({
+            '$ref': '#/definitions/OutputTokenDetails',
+          }),
           'output_tokens': dict({
             'title': 'Output Tokens',
             'type': 'integer',
@@ -6744,6 +6972,39 @@
         'title': 'HumanMessageChunk',
         'type': 'object',
       }),
+      'InputTokenDetails': dict({
+        'description': '''
+          Breakdown of input token counts.
+          
+          Does *not* need to sum to full input token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "cache_creation": 200,
+                      "cache_read": 100,
+                  }
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'cache_creation': dict({
+            'title': 'Cache Creation',
+            'type': 'integer',
+          }),
+          'cache_read': dict({
+            'title': 'Cache Read',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'InputTokenDetails',
+        'type': 'object',
+      }),
       'InvalidToolCall': dict({
         'description': '''
           Allowance for errors made by LLM.
@@ -6810,6 +7071,34 @@
         'title': 'InvalidToolCall',
         'type': 'object',
       }),
+      'OutputTokenDetails': dict({
+        'description': '''
+          Breakdown of output token counts.
+          
+          Does *not* need to sum to full output token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "reasoning": 200,
+                  }
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'reasoning': dict({
+            'title': 'Reasoning',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'OutputTokenDetails',
+        'type': 'object',
+      }),
       'SystemMessage': dict({
         'additionalProperties': True,
         'description': '''
@@ -7314,14 +7603,29 @@
                   {
                       "input_tokens": 10,
                       "output_tokens": 20,
-                      "total_tokens": 30
+                      "total_tokens": 30,
+                      "input_token_details": {
+                          "audio": 10,
+                          "cache_creation": 200,
+                          "cache_read": 100,
+                      },
+                      "output_token_details": {
+                          "audio": 10,
+                          "reasoning": 200,
+                      }
                   }
         ''',
         'properties': dict({
+          'input_token_details': dict({
+            '$ref': '#/definitions/InputTokenDetails',
+          }),
           'input_tokens': dict({
             'title': 'Input Tokens',
             'type': 'integer',
           }),
+          'output_token_details': dict({
+            '$ref': '#/definitions/OutputTokenDetails',
+          }),
           'output_tokens': dict({
             'title': 'Output Tokens',
             'type': 'integer',
@@ -8175,6 +8479,39 @@
         'title': 'HumanMessageChunk',
         'type': 'object',
       }),
+      'InputTokenDetails': dict({
+        'description': '''
+          Breakdown of input token counts.
+          
+          Does *not* need to sum to full input token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "cache_creation": 200,
+                      "cache_read": 100,
+                  }
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'cache_creation': dict({
+            'title': 'Cache Creation',
+            'type': 'integer',
+          }),
+          'cache_read': dict({
+            'title': 'Cache Read',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'InputTokenDetails',
+        'type': 'object',
+      }),
       'InvalidToolCall': dict({
         'description': '''
           Allowance for errors made by LLM.
@@ -8241,6 +8578,34 @@
         'title': 'InvalidToolCall',
         'type': 'object',
       }),
+      'OutputTokenDetails': dict({
+        'description': '''
+          Breakdown of output token counts.
+          
+          Does *not* need to sum to full output token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "reasoning": 200,
+                  }
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'reasoning': dict({
+            'title': 'Reasoning',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'OutputTokenDetails',
+        'type': 'object',
+      }),
       'StringPromptValue': dict({
         'description': 'String prompt value.',
         'properties': dict({
@@ -8764,14 +9129,29 @@
                   {
                       "input_tokens": 10,
                       "output_tokens": 20,
-                      "total_tokens": 30
+                      "total_tokens": 30,
+                      "input_token_details": {
+                          "audio": 10,
+                          "cache_creation": 200,
+                          "cache_read": 100,
+                      },
+                      "output_token_details": {
+                          "audio": 10,
+                          "reasoning": 200,
+                      }
                   }
         ''',
         'properties': dict({
+          'input_token_details': dict({
+            '$ref': '#/definitions/InputTokenDetails',
+          }),
           'input_tokens': dict({
             'title': 'Input Tokens',
             'type': 'integer',
           }),
+          'output_token_details': dict({
+            '$ref': '#/definitions/OutputTokenDetails',
+          }),
           'output_tokens': dict({
             'title': 'Output Tokens',
             'type': 'integer',
@@ -9516,6 +9896,39 @@
         'title': 'HumanMessageChunk',
         'type': 'object',
       }),
+      'InputTokenDetails': dict({
+        'description': '''
+          Breakdown of input token counts.
+          
+          Does *not* need to sum to full input token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "cache_creation": 200,
+                      "cache_read": 100,
+                  }
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'cache_creation': dict({
+            'title': 'Cache Creation',
+            'type': 'integer',
+          }),
+          'cache_read': dict({
+            'title': 'Cache Read',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'InputTokenDetails',
+        'type': 'object',
+      }),
       'InvalidToolCall': dict({
         'description': '''
           Allowance for errors made by LLM.
@@ -9582,6 +9995,34 @@
         'title': 'InvalidToolCall',
         'type': 'object',
       }),
+      'OutputTokenDetails': dict({
+        'description': '''
+          Breakdown of output token counts.
+          
+          Does *not* need to sum to full output token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "reasoning": 200,
+                  }
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'reasoning': dict({
+            'title': 'Reasoning',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'OutputTokenDetails',
+        'type': 'object',
+      }),
       'SystemMessage': dict({
         'additionalProperties': True,
         'description': '''
@@ -10086,14 +10527,29 @@
                   {
                       "input_tokens": 10,
                       "output_tokens": 20,
-                      "total_tokens": 30
+                      "total_tokens": 30,
+                      "input_token_details": {
+                          "audio": 10,
+                          "cache_creation": 200,
+                          "cache_read": 100,
+                      },
+                      "output_token_details": {
+                          "audio": 10,
+                          "reasoning": 200,
+                      }
                   }
         ''',
         'properties': dict({
+          'input_token_details': dict({
+            '$ref': '#/definitions/InputTokenDetails',
+          }),
           'input_tokens': dict({
             'title': 'Input Tokens',
             'type': 'integer',
           }),
+          'output_token_details': dict({
+            '$ref': '#/definitions/OutputTokenDetails',
+          }),
           'output_tokens': dict({
             'title': 'Output Tokens',
             'type': 'integer',
@@ -10855,6 +11311,39 @@
         'title': 'HumanMessageChunk',
         'type': 'object',
       }),
+      'InputTokenDetails': dict({
+        'description': '''
+          Breakdown of input token counts.
+          
+          Does *not* need to sum to full input token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "cache_creation": 200,
+                      "cache_read": 100,
+                  }
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'cache_creation': dict({
+            'title': 'Cache Creation',
+            'type': 'integer',
+          }),
+          'cache_read': dict({
+            'title': 'Cache Read',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'InputTokenDetails',
+        'type': 'object',
+      }),
       'InvalidToolCall': dict({
         'description': '''
           Allowance for errors made by LLM.
@@ -10921,6 +11410,34 @@
         'title': 'InvalidToolCall',
         'type': 'object',
       }),
+      'OutputTokenDetails': dict({
+        'description': '''
+          Breakdown of output token counts.
+          
+          Does *not* need to sum to full output token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "reasoning": 200,
+                  }
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'reasoning': dict({
+            'title': 'Reasoning',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'OutputTokenDetails',
+        'type': 'object',
+      }),
       'PromptTemplateOutput': dict({
         'anyOf': list([
           dict({
@@ -11455,14 +11972,29 @@
                   {
                       "input_tokens": 10,
                       "output_tokens": 20,
-                      "total_tokens": 30
+                      "total_tokens": 30,
+                      "input_token_details": {
+                          "audio": 10,
+                          "cache_creation": 200,
+                          "cache_read": 100,
+                      },
+                      "output_token_details": {
+                          "audio": 10,
+                          "reasoning": 200,
+                      }
                   }
         ''',
         'properties': dict({
+          'input_token_details': dict({
+            '$ref': '#/definitions/InputTokenDetails',
+          }),
           'input_tokens': dict({
             'title': 'Input Tokens',
             'type': 'integer',
           }),
+          'output_token_details': dict({
+            '$ref': '#/definitions/OutputTokenDetails',
+          }),
           'output_tokens': dict({
             'title': 'Output Tokens',
             'type': 'integer',
@@ -12236,6 +12768,39 @@
         'title': 'HumanMessageChunk',
         'type': 'object',
       }),
+      'InputTokenDetails': dict({
+        'description': '''
+          Breakdown of input token counts.
+          
+          Does *not* need to sum to full input token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "cache_creation": 200,
+                      "cache_read": 100,
+                  }
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'cache_creation': dict({
+            'title': 'Cache Creation',
+            'type': 'integer',
+          }),
+          'cache_read': dict({
+            'title': 'Cache Read',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'InputTokenDetails',
+        'type': 'object',
+      }),
       'InvalidToolCall': dict({
         'description': '''
           Allowance for errors made by LLM.
@@ -12302,6 +12867,34 @@
         'title': 'InvalidToolCall',
         'type': 'object',
       }),
+      'OutputTokenDetails': dict({
+        'description': '''
+          Breakdown of output token counts.
+          
+          Does *not* need to sum to full output token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "reasoning": 200,
+                  }
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'reasoning': dict({
+            'title': 'Reasoning',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'OutputTokenDetails',
+        'type': 'object',
+      }),
       'StringPromptValue': dict({
         'description': 'String prompt value.',
         'properties': dict({
@@ -12825,14 +13418,29 @@
                   {
                       "input_tokens": 10,
                       "output_tokens": 20,
-                      "total_tokens": 30
+                      "total_tokens": 30,
+                      "input_token_details": {
+                          "audio": 10,
+                          "cache_creation": 200,
+                          "cache_read": 100,
+                      },
+                      "output_token_details": {
+                          "audio": 10,
+                          "reasoning": 200,
+                      }
                   }
         ''',
         'properties': dict({
+          'input_token_details': dict({
+            '$ref': '#/definitions/InputTokenDetails',
+          }),
           'input_tokens': dict({
             'title': 'Input Tokens',
             'type': 'integer',
           }),
+          'output_token_details': dict({
+            '$ref': '#/definitions/OutputTokenDetails',
+          }),
           'output_tokens': dict({
             'title': 'Output Tokens',
             'type': 'integer',

From b61aae55d3295e7cc4e764b1a1d0fbf0c5c3e0f4 Mon Sep 17 00:00:00 2001
From: Bagatur <baskaryan@gmail.com>
Date: Thu, 3 Oct 2024 10:25:45 -0700
Subject: [PATCH 04/13] doc nit

---
 libs/core/langchain_core/messages/ai.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py
index 6336cc9b9fb76..07e90b735f04c 100644
--- a/libs/core/langchain_core/messages/ai.py
+++ b/libs/core/langchain_core/messages/ai.py
@@ -95,9 +95,9 @@ class UsageMetadata(TypedDict):
         .. code-block:: python
 
             {
-                "input_tokens": 10,
-                "output_tokens": 20,
-                "total_tokens": 30,
+                "input_tokens": 350,
+                "output_tokens": 240,
+                "total_tokens": 590,
                 "input_token_details": {
                     "audio": 10,
                     "cache_creation": 200,

From 34a83d9bbb34ff5661b27d13678273b918f18f86 Mon Sep 17 00:00:00 2001
From: Bagatur <baskaryan@gmail.com>
Date: Thu, 3 Oct 2024 10:26:39 -0700
Subject: [PATCH 05/13] snapshot

---
 .../prompts/__snapshots__/test_chat.ambr      | 12 ++---
 .../runnables/__snapshots__/test_graph.ambr   |  6 +--
 .../__snapshots__/test_runnable.ambr          | 48 +++++++++----------
 3 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr
index 85513fd122873..72734872c2c29 100644
--- a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr
+++ b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr
@@ -1306,9 +1306,9 @@
               .. code-block:: python
           
                   {
-                      "input_tokens": 10,
-                      "output_tokens": 20,
-                      "total_tokens": 30,
+                      "input_tokens": 350,
+                      "output_tokens": 240,
+                      "total_tokens": 590,
                       "input_token_details": {
                           "audio": 10,
                           "cache_creation": 200,
@@ -2713,9 +2713,9 @@
               .. code-block:: python
           
                   {
-                      "input_tokens": 10,
-                      "output_tokens": 20,
-                      "total_tokens": 30,
+                      "input_tokens": 350,
+                      "output_tokens": 240,
+                      "total_tokens": 590,
                       "input_token_details": {
                           "audio": 10,
                           "cache_creation": 200,
diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr
index 13ebdea6171e8..2cd1e6aea7aa4 100644
--- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr
+++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr
@@ -1666,9 +1666,9 @@
                     .. code-block:: python
                 
                         {
-                            "input_tokens": 10,
-                            "output_tokens": 20,
-                            "total_tokens": 30,
+                            "input_tokens": 350,
+                            "output_tokens": 240,
+                            "total_tokens": 590,
                             "input_token_details": {
                                 "audio": 10,
                                 "cache_creation": 200,
diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr
index 1df398b7298ad..4f53b88c6bec2 100644
--- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr
+++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr
@@ -3268,9 +3268,9 @@
               .. code-block:: python
           
                   {
-                      "input_tokens": 10,
-                      "output_tokens": 20,
-                      "total_tokens": 30,
+                      "input_tokens": 350,
+                      "output_tokens": 240,
+                      "total_tokens": 590,
                       "input_token_details": {
                           "audio": 10,
                           "cache_creation": 200,
@@ -4752,9 +4752,9 @@
               .. code-block:: python
           
                   {
-                      "input_tokens": 10,
-                      "output_tokens": 20,
-                      "total_tokens": 30,
+                      "input_tokens": 350,
+                      "output_tokens": 240,
+                      "total_tokens": 590,
                       "input_token_details": {
                           "audio": 10,
                           "cache_creation": 200,
@@ -6248,9 +6248,9 @@
               .. code-block:: python
           
                   {
-                      "input_tokens": 10,
-                      "output_tokens": 20,
-                      "total_tokens": 30,
+                      "input_tokens": 350,
+                      "output_tokens": 240,
+                      "total_tokens": 590,
                       "input_token_details": {
                           "audio": 10,
                           "cache_creation": 200,
@@ -7601,9 +7601,9 @@
               .. code-block:: python
           
                   {
-                      "input_tokens": 10,
-                      "output_tokens": 20,
-                      "total_tokens": 30,
+                      "input_tokens": 350,
+                      "output_tokens": 240,
+                      "total_tokens": 590,
                       "input_token_details": {
                           "audio": 10,
                           "cache_creation": 200,
@@ -9127,9 +9127,9 @@
               .. code-block:: python
           
                   {
-                      "input_tokens": 10,
-                      "output_tokens": 20,
-                      "total_tokens": 30,
+                      "input_tokens": 350,
+                      "output_tokens": 240,
+                      "total_tokens": 590,
                       "input_token_details": {
                           "audio": 10,
                           "cache_creation": 200,
@@ -10525,9 +10525,9 @@
               .. code-block:: python
           
                   {
-                      "input_tokens": 10,
-                      "output_tokens": 20,
-                      "total_tokens": 30,
+                      "input_tokens": 350,
+                      "output_tokens": 240,
+                      "total_tokens": 590,
                       "input_token_details": {
                           "audio": 10,
                           "cache_creation": 200,
@@ -11970,9 +11970,9 @@
               .. code-block:: python
           
                   {
-                      "input_tokens": 10,
-                      "output_tokens": 20,
-                      "total_tokens": 30,
+                      "input_tokens": 350,
+                      "output_tokens": 240,
+                      "total_tokens": 590,
                       "input_token_details": {
                           "audio": 10,
                           "cache_creation": 200,
@@ -13416,9 +13416,9 @@
               .. code-block:: python
           
                   {
-                      "input_tokens": 10,
-                      "output_tokens": 20,
-                      "total_tokens": 30,
+                      "input_tokens": 350,
+                      "output_tokens": 240,
+                      "total_tokens": 590,
                       "input_token_details": {
                           "audio": 10,
                           "cache_creation": 200,

From 5dd10ed87df87f1f015b32bac2fb1e5c96d818a6 Mon Sep 17 00:00:00 2001
From: Bagatur <baskaryan@gmail.com>
Date: Thu, 3 Oct 2024 11:23:54 -0700
Subject: [PATCH 06/13] openai[patch]: add usage metadata details

---
 .../langchain_openai/chat_models/base.py      | 31 ++++++++++++++++---
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py
index 21754937d180f..d2cc2d3883aab 100644
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@@ -63,7 +63,8 @@
     ToolMessage,
     ToolMessageChunk,
 )
-from langchain_core.messages.ai import UsageMetadata
+from langchain_core.messages.ai import UsageMetadata, InputTokenDetails, \
+    OutputTokenDetails
 from langchain_core.messages.tool import tool_call_chunk
 from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser
 from langchain_core.output_parsers.openai_tools import (
@@ -284,13 +285,33 @@ def _convert_delta_to_message_chunk(
 def _convert_chunk_to_generation_chunk(
     chunk: dict, default_chunk_class: Type, base_generation_info: Optional[Dict]
 ) -> Optional[ChatGenerationChunk]:
-    token_usage = chunk.get("usage")
+    token_usage = chunk.get("usage", {})
     choices = chunk.get("choices", [])
+
+    input_tokens = token_usage.get("prompt_tokens", 0)
+    output_tokens = token_usage.get("completion_tokens", 0)
+    total_tokens = token_usage.get("total_tokens", input_tokens + output_tokens)
+    input_token_details = {
+        "audio": token_usage.get("prompt_tokens_details", {}).get("audio_tokens"),
+        "cache_read": token_usage.get("prompt_tokens_details", {}).get("cached_tokens"),
+    }
+    output_token_details = {
+        "audio": token_usage.get("completion_tokens_details", {}).get("audio_tokens"),
+        "reasoning": token_usage.get("completion_token_details", {}).get(
+            "reasoning_tokens"
+        ),
+    }
     usage_metadata: Optional[UsageMetadata] = (
         UsageMetadata(
-            input_tokens=token_usage.get("prompt_tokens", 0),
-            output_tokens=token_usage.get("completion_tokens", 0),
-            total_tokens=token_usage.get("total_tokens", 0),
+            input_tokens=input_tokens,
+            output_tokens=output_tokens,
+            total_tokens=total_tokens,
+            input_token_details=InputTokenDetails(**{
+                k: v for k, v in input_token_details.items() if v is not None
+            }),
+            output_token_details=OutputTokenDetails(**{
+                k: v for k, v in output_token_details.items() if v is not None
+            }),
         )
         if token_usage
         else None

From 6833c6d92bf90d4962ba3a77244efe3f3ed1ebe9 Mon Sep 17 00:00:00 2001
From: Bagatur <baskaryan@gmail.com>
Date: Thu, 3 Oct 2024 11:58:26 -0700
Subject: [PATCH 07/13] wip

---
 .../langchain_openai/chat_models/base.py      | 76 ++++++++++---------
 1 file changed, 40 insertions(+), 36 deletions(-)

diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py
index d2cc2d3883aab..c797f31303940 100644
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@@ -63,8 +63,11 @@
     ToolMessage,
     ToolMessageChunk,
 )
-from langchain_core.messages.ai import UsageMetadata, InputTokenDetails, \
-    OutputTokenDetails
+from langchain_core.messages.ai import (
+    InputTokenDetails,
+    OutputTokenDetails,
+    UsageMetadata,
+)
 from langchain_core.messages.tool import tool_call_chunk
 from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser
 from langchain_core.output_parsers.openai_tools import (
@@ -285,38 +288,12 @@ def _convert_delta_to_message_chunk(
 def _convert_chunk_to_generation_chunk(
     chunk: dict, default_chunk_class: Type, base_generation_info: Optional[Dict]
 ) -> Optional[ChatGenerationChunk]:
-    token_usage = chunk.get("usage", {})
+    token_usage = chunk.get("usage")
     choices = chunk.get("choices", [])
 
-    input_tokens = token_usage.get("prompt_tokens", 0)
-    output_tokens = token_usage.get("completion_tokens", 0)
-    total_tokens = token_usage.get("total_tokens", input_tokens + output_tokens)
-    input_token_details = {
-        "audio": token_usage.get("prompt_tokens_details", {}).get("audio_tokens"),
-        "cache_read": token_usage.get("prompt_tokens_details", {}).get("cached_tokens"),
-    }
-    output_token_details = {
-        "audio": token_usage.get("completion_tokens_details", {}).get("audio_tokens"),
-        "reasoning": token_usage.get("completion_token_details", {}).get(
-            "reasoning_tokens"
-        ),
-    }
     usage_metadata: Optional[UsageMetadata] = (
-        UsageMetadata(
-            input_tokens=input_tokens,
-            output_tokens=output_tokens,
-            total_tokens=total_tokens,
-            input_token_details=InputTokenDetails(**{
-                k: v for k, v in input_token_details.items() if v is not None
-            }),
-            output_token_details=OutputTokenDetails(**{
-                k: v for k, v in output_token_details.items() if v is not None
-            }),
-        )
-        if token_usage
-        else None
+        _create_usage_metadata(token_usage) if token_usage else None
     )
-
     if len(choices) == 0:
         # logprobs is implicitly None
         generation_chunk = ChatGenerationChunk(
@@ -742,15 +719,11 @@ def _create_chat_result(
         if response_dict.get("error"):
             raise ValueError(response_dict.get("error"))
 
-        token_usage = response_dict.get("usage", {})
+        token_usage = response_dict.get("usage")
         for res in response_dict["choices"]:
             message = _convert_dict_to_message(res["message"])
             if token_usage and isinstance(message, AIMessage):
-                message.usage_metadata = {
-                    "input_tokens": token_usage.get("prompt_tokens", 0),
-                    "output_tokens": token_usage.get("completion_tokens", 0),
-                    "total_tokens": token_usage.get("total_tokens", 0),
-                }
+                message.usage_metadata = _create_usage_metadata(token_usage)
             generation_info = generation_info or {}
             generation_info["finish_reason"] = (
                 res.get("finish_reason")
@@ -2181,3 +2154,34 @@ class OpenAIRefusalError(Exception):
 
     .. versionadded:: 0.1.21
     """
+
+
+def _create_usage_metadata(oai_token_usage: dict) -> UsageMetadata:
+    input_tokens = oai_token_usage.get("prompt_tokens", 0)
+    output_tokens = oai_token_usage.get("completion_tokens", 0)
+    total_tokens = oai_token_usage.get("total_tokens", input_tokens + output_tokens)
+    input_token_details = {
+        "audio": oai_token_usage.get("prompt_tokens_details", {}).get("audio_tokens"),
+        "cache_read": oai_token_usage.get("prompt_tokens_details", {}).get(
+            "cached_tokens"
+        ),
+    }
+    output_token_details = {
+        "audio": oai_token_usage.get("completion_tokens_details", {}).get(
+            "audio_tokens"
+        ),
+        "reasoning": oai_token_usage.get("completion_token_details", {}).get(
+            "reasoning_tokens"
+        ),
+    }
+    return UsageMetadata(
+        input_tokens=input_tokens,
+        output_tokens=output_tokens,
+        total_tokens=total_tokens,
+        input_token_details=InputTokenDetails(
+            **{k: v for k, v in input_token_details.items() if v is not None}
+        ),
+        output_token_details=OutputTokenDetails(
+            **{k: v for k, v in output_token_details.items() if v is not None}
+        ),
+    )

From ca3a1032be1d4e8e4b01cdc2775bbbcf73ce7ef1 Mon Sep 17 00:00:00 2001
From: Bagatur <baskaryan@gmail.com>
Date: Thu, 3 Oct 2024 12:46:58 -0700
Subject: [PATCH 08/13] fmt

---
 .../integration_tests/chat_models.py          | 65 +++++++++++++++++++
 .../unit_tests/chat_models.py                 |  4 ++
 2 files changed, 69 insertions(+)

diff --git a/libs/standard-tests/langchain_standard_tests/integration_tests/chat_models.py b/libs/standard-tests/langchain_standard_tests/integration_tests/chat_models.py
index c5ac855cdc6a2..1da209095ef40 100644
--- a/libs/standard-tests/langchain_standard_tests/integration_tests/chat_models.py
+++ b/libs/standard-tests/langchain_standard_tests/integration_tests/chat_models.py
@@ -151,6 +151,31 @@ def test_usage_metadata(self, model: BaseChatModel) -> None:
         assert isinstance(result.usage_metadata["output_tokens"], int)
         assert isinstance(result.usage_metadata["total_tokens"], int)
 
+        if "audio_input" in self.supported_usage_metadata_details:
+            msg = self.invoke_with_audio_input()
+            assert isinstance(msg.usage_metadata["input_token_details"]["audio"], int)  # type: ignore[index]
+        if "audio_output" in self.supported_usage_metadata_details:
+            msg = self.invoke_with_audio_output()
+            assert isinstance(msg.usage_metadata["output_token_details"]["audio"], int)  # type: ignore[index]
+        if "reasoning_output" in self.supported_usage_metadata_details:
+            msg = self.invoke_with_reasoning_output()
+            assert isinstance(
+                msg.usage_metadata["output_token_details"]["reasoning"],  # type: ignore[index]
+                int,
+            )
+        if "cache_read_input" in self.supported_usage_metadata_details:
+            msg = self.invoke_with_cache_read_input()
+            assert isinstance(
+                msg.usage_metadata["input_token_details"]["cache_read"],  # type: ignore[index]
+                int,
+            )
+        if "cache_creation_input" in self.supported_usage_metadata_details:
+            msg = self.invoke_with_cache_creation_input()
+            assert isinstance(
+                msg.usage_metadata["input_token_details"]["cache_creation"],  # type: ignore[index]
+                int,
+            )
+
     def test_usage_metadata_streaming(self, model: BaseChatModel) -> None:
         if not self.returns_usage_metadata:
             pytest.skip("Not implemented.")
@@ -164,6 +189,31 @@ def test_usage_metadata_streaming(self, model: BaseChatModel) -> None:
         assert isinstance(full.usage_metadata["output_tokens"], int)
         assert isinstance(full.usage_metadata["total_tokens"], int)
 
+        if "audio_input" in self.supported_usage_metadata_details:
+            msg = self.invoke_with_audio_input(stream=True)
+            assert isinstance(msg.usage_metadata["input_token_details"]["audio"], int)  # type: ignore[index]
+        if "audio_output" in self.supported_usage_metadata_details:
+            msg = self.invoke_with_audio_output(stream=True)
+            assert isinstance(msg.usage_metadata["output_token_details"]["audio"], int)  # type: ignore[index]
+        if "reasoning_output" in self.supported_usage_metadata_details:
+            msg = self.invoke_with_reasoning_output(stream=True)
+            assert isinstance(
+                msg.usage_metadata["output_token_details"]["reasoning"],  # type: ignore[index]
+                int,
+            )
+        if "cache_read_input" in self.supported_usage_metadata_details:
+            msg = self.invoke_with_cache_read_input(stream=True)
+            assert isinstance(
+                msg.usage_metadata["input_token_details"]["cache_read"],  # type: ignore[index]
+                int,
+            )
+        if "cache_creation_input" in self.supported_usage_metadata_details:
+            msg = self.invoke_with_cache_creation_input(stream=True)
+            assert isinstance(
+                msg.usage_metadata["input_token_details"]["cache_creation"],  # type: ignore[index]
+                int,
+            )
+
     def test_stop_sequence(self, model: BaseChatModel) -> None:
         result = model.invoke("hi", stop=["you"])
         assert isinstance(result, AIMessage)
@@ -608,3 +658,18 @@ def test_message_with_name(self, model: BaseChatModel) -> None:
         assert isinstance(result, AIMessage)
         assert isinstance(result.content, str)
         assert len(result.content) > 0
+
+    def invoke_with_audio_input(self, *, stream: bool = False) -> AIMessage:
+        raise NotImplementedError()
+
+    def invoke_with_audio_output(self, *, stream: bool = False) -> AIMessage:
+        raise NotImplementedError()
+
+    def invoke_with_reasoning_output(self, *, stream: bool = False) -> AIMessage:
+        raise NotImplementedError()
+
+    def invoke_with_cache_read_input(self, *, stream: bool = False) -> AIMessage:
+        raise NotImplementedError()
+
+    def invoke_with_cache_creation_input(self, *, stream: bool = False) -> AIMessage:
+        raise NotImplementedError()
diff --git a/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py b/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py
index 1a611f1800f5c..d3df72166071c 100644
--- a/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py
+++ b/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py
@@ -138,6 +138,10 @@ def supports_anthropic_inputs(self) -> bool:
     def supports_image_tool_message(self) -> bool:
         return False
 
+    @property
+    def supported_usage_metadata_details(self) -> List[str]:
+        return []
+
 
 class ChatModelUnitTests(ChatModelTests):
     @property

From 78161ad9117a05a16255e37897cb359829d45361 Mon Sep 17 00:00:00 2001
From: Bagatur <baskaryan@gmail.com>
Date: Thu, 3 Oct 2024 13:11:34 -0700
Subject: [PATCH 09/13] fmt

---
 .../langchain_openai/chat_models/base.py      |  6 +--
 .../chat_models/test_base_standard.py         | 51 ++++++++++++++++++-
 .../unit_tests/chat_models.py                 | 12 ++++-
 3 files changed, 64 insertions(+), 5 deletions(-)

diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py
index c797f31303940..27d4adf06817a 100644
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@@ -2160,17 +2160,17 @@ def _create_usage_metadata(oai_token_usage: dict) -> UsageMetadata:
     input_tokens = oai_token_usage.get("prompt_tokens", 0)
     output_tokens = oai_token_usage.get("completion_tokens", 0)
     total_tokens = oai_token_usage.get("total_tokens", input_tokens + output_tokens)
-    input_token_details = {
+    input_token_details: dict = {
         "audio": oai_token_usage.get("prompt_tokens_details", {}).get("audio_tokens"),
         "cache_read": oai_token_usage.get("prompt_tokens_details", {}).get(
             "cached_tokens"
         ),
     }
-    output_token_details = {
+    output_token_details: dict = {
         "audio": oai_token_usage.get("completion_tokens_details", {}).get(
             "audio_tokens"
         ),
-        "reasoning": oai_token_usage.get("completion_token_details", {}).get(
+        "reasoning": oai_token_usage.get("completion_tokens_details", {}).get(
             "reasoning_tokens"
         ),
     }
diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py
index 22b305c9753d4..b91b590ad7cd9 100644
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py
@@ -1,12 +1,16 @@
 """Standard LangChain interface tests"""
 
-from typing import Type
+from pathlib import Path
+from typing import List, Literal, Type, cast
 
 from langchain_core.language_models import BaseChatModel
+from langchain_core.messages import AIMessage
 from langchain_standard_tests.integration_tests import ChatModelIntegrationTests
 
 from langchain_openai import ChatOpenAI
 
+REPO_ROOT_DIR = Path(__file__).parents[6]
+
 
 class TestOpenAIStandard(ChatModelIntegrationTests):
     @property
@@ -20,3 +24,48 @@ def chat_model_params(self) -> dict:
     @property
     def supports_image_inputs(self) -> bool:
         return True
+
+    @property
+    def supported_usage_metadata_details(
+        self,
+    ) -> List[
+        Literal[
+            "audio_input",
+            "audio_output",
+            "reasoning_output",
+            "cache_read_input",
+            "cache_creation_input",
+        ]
+    ]:
+        return ["reasoning_output", "cache_read_input"]
+
+    def invoke_with_cache_read_input(self, *, stream: bool = False) -> AIMessage:
+        with open(REPO_ROOT_DIR / "README.md", "r") as f:
+            readme = f.read()
+
+        input_ = f"""What's langchain? Here's the langchain README:
+        
+        {readme}
+        """
+        llm = ChatOpenAI(model="gpt-4o-mini", stream_usage=True)
+        _invoke(llm, input_, stream)
+        # invoke twice so first invocation is cached
+        return _invoke(llm, input_, stream)
+
+    def invoke_with_reasoning_output(self, *, stream: bool = False) -> AIMessage:
+        llm = ChatOpenAI(model="o1-mini", stream_usage=True, temperature=1)
+        input_ = (
+            "explain  the relationship between the 2008/9 economic crisis and the "
+            "startup ecosystem in the early 2010s"
+        )
+        return _invoke(llm, input_, stream)
+
+
+def _invoke(llm: ChatOpenAI, input_: str, stream: bool) -> AIMessage:
+    if stream:
+        full = None
+        for chunk in llm.stream(input_):
+            full = full + chunk if full else chunk  # type: ignore[operator]
+        return cast(AIMessage, full)
+    else:
+        return cast(AIMessage, llm.invoke(input_))
diff --git a/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py b/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py
index d3df72166071c..1298c022852cf 100644
--- a/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py
+++ b/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py
@@ -139,7 +139,17 @@ def supports_image_tool_message(self) -> bool:
         return False
 
     @property
-    def supported_usage_metadata_details(self) -> List[str]:
+    def supported_usage_metadata_details(
+        self,
+    ) -> List[
+        Literal[
+            "audio_input",
+            "audio_output",
+            "reasoning_output",
+            "cache_read_input",
+            "cache_creation_input",
+        ]
+    ]:
         return []
 
 

From 3d4437a502459935b206b1416498c93364f9adf6 Mon Sep 17 00:00:00 2001
From: Bagatur <baskaryan@gmail.com>
Date: Thu, 3 Oct 2024 13:13:41 -0700
Subject: [PATCH 10/13] fmt

---
 .../unit_tests/chat_models.py                        | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py b/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py
index d3df72166071c..1298c022852cf 100644
--- a/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py
+++ b/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py
@@ -139,7 +139,17 @@ def supports_image_tool_message(self) -> bool:
         return False
 
     @property
-    def supported_usage_metadata_details(self) -> List[str]:
+    def supported_usage_metadata_details(
+        self,
+    ) -> List[
+        Literal[
+            "audio_input",
+            "audio_output",
+            "reasoning_output",
+            "cache_read_input",
+            "cache_creation_input",
+        ]
+    ]:
         return []
 
 

From 05829ed185937c4d07d9b449c62fa16357df2068 Mon Sep 17 00:00:00 2001
From: Bagatur <baskaryan@gmail.com>
Date: Thu, 3 Oct 2024 13:18:02 -0700
Subject: [PATCH 11/13] docs

---
 libs/core/langchain_core/messages/ai.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py
index 07e90b735f04c..a86c51b0664c9 100644
--- a/libs/core/langchain_core/messages/ai.py
+++ b/libs/core/langchain_core/messages/ai.py
@@ -43,6 +43,8 @@ class InputTokenDetails(TypedDict, total=False):
                 "cache_creation": 200,
                 "cache_read": 100,
             }
+
+    .. versionadded:: 0.3.9
     """
 
     audio: int
@@ -73,6 +75,8 @@ class OutputTokenDetails(TypedDict, total=False):
                 "audio": 10,
                 "reasoning": 200,
             }
+
+    .. versionadded:: 0.3.9
     """
 
     audio: int
@@ -108,6 +112,10 @@ class UsageMetadata(TypedDict):
                     "reasoning": 200,
                 }
             }
+
+    .. versionchanged:: 0.3.9
+
+        Added ``input_token_details`` and ``output_token_details``.
     """
 
     input_tokens: int
@@ -117,7 +125,15 @@ class UsageMetadata(TypedDict):
     total_tokens: int
     """Total token count. Sum of input_tokens + output_tokens."""
     input_token_details: NotRequired[InputTokenDetails]
+    """Breakdown of input token counts.
+    
+    Does *not* need to sum to full input token count. Does *not* need to have all keys.
+    """
     output_token_details: NotRequired[OutputTokenDetails]
+    """Breakdown of output token counts.
+    
+    Does *not* need to sum to full output token count. Does *not* need to have all keys.
+    """
 
 
 class AIMessage(BaseMessage):

From 2d4b4a9ab8d9d7b8a5a84c699cffefa5cafacc9b Mon Sep 17 00:00:00 2001
From: Bagatur <baskaryan@gmail.com>
Date: Thu, 3 Oct 2024 13:24:41 -0700
Subject: [PATCH 12/13] fmt

---
 libs/core/langchain_core/messages/ai.py | 4 ++--
 libs/core/pyproject.toml                | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py
index a86c51b0664c9..dece1e575eeb6 100644
--- a/libs/core/langchain_core/messages/ai.py
+++ b/libs/core/langchain_core/messages/ai.py
@@ -126,12 +126,12 @@ class UsageMetadata(TypedDict):
     """Total token count. Sum of input_tokens + output_tokens."""
     input_token_details: NotRequired[InputTokenDetails]
     """Breakdown of input token counts.
-    
+ 
     Does *not* need to sum to full input token count. Does *not* need to have all keys.
     """
     output_token_details: NotRequired[OutputTokenDetails]
     """Breakdown of output token counts.
-    
+
     Does *not* need to sum to full output token count. Does *not* need to have all keys.
     """
 
diff --git a/libs/core/pyproject.toml b/libs/core/pyproject.toml
index 11a2e85aa376c..20dbf792c952d 100644
--- a/libs/core/pyproject.toml
+++ b/libs/core/pyproject.toml
@@ -45,7 +45,7 @@ python = ">=3.12.4"
 
 [tool.ruff.lint]
 select = [ "B", "C4", "E", "F", "I", "N", "PIE", "SIM", "T201", "UP", "W",]
-ignore = [ "UP007",]
+ignore = [ "UP007", 'W293']
 
 [tool.coverage.run]
 omit = [ "tests/*",]

From 4854a0b9331ac780d40985d1aa791b284077e34b Mon Sep 17 00:00:00 2001
From: Bagatur <baskaryan@gmail.com>
Date: Thu, 3 Oct 2024 13:28:00 -0700
Subject: [PATCH 13/13] snapshot

---
 .../prompts/__snapshots__/test_chat.ambr      | 16 +++++
 .../runnables/__snapshots__/test_graph.ambr   |  8 +++
 .../__snapshots__/test_runnable.ambr          | 64 +++++++++++++++++++
 3 files changed, 88 insertions(+)

diff --git a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr
index 72734872c2c29..7e35bd6c46548 100644
--- a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr
+++ b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr
@@ -692,6 +692,8 @@
                       "cache_creation": 200,
                       "cache_read": 100,
                   }
+          
+          .. versionadded:: 0.3.9
         ''',
         'properties': dict({
           'audio': dict({
@@ -790,6 +792,8 @@
                       "audio": 10,
                       "reasoning": 200,
                   }
+          
+          .. versionadded:: 0.3.9
         ''',
         'properties': dict({
           'audio': dict({
@@ -1319,6 +1323,10 @@
                           "reasoning": 200,
                       }
                   }
+          
+          .. versionchanged:: 0.3.9
+          
+              Added ``input_token_details`` and ``output_token_details``.
         ''',
         'properties': dict({
           'input_token_details': dict({
@@ -2099,6 +2107,8 @@
                       "cache_creation": 200,
                       "cache_read": 100,
                   }
+          
+          .. versionadded:: 0.3.9
         ''',
         'properties': dict({
           'audio': dict({
@@ -2197,6 +2207,8 @@
                       "audio": 10,
                       "reasoning": 200,
                   }
+          
+          .. versionadded:: 0.3.9
         ''',
         'properties': dict({
           'audio': dict({
@@ -2726,6 +2738,10 @@
                           "reasoning": 200,
                       }
                   }
+          
+          .. versionchanged:: 0.3.9
+          
+              Added ``input_token_details`` and ``output_token_details``.
         ''',
         'properties': dict({
           'input_token_details': dict({
diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr
index 2cd1e6aea7aa4..ba9d742b37407 100644
--- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr
+++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr
@@ -1052,6 +1052,8 @@
                             "cache_creation": 200,
                             "cache_read": 100,
                         }
+                
+                .. versionadded:: 0.3.9
               ''',
               'properties': dict({
                 'audio': dict({
@@ -1150,6 +1152,8 @@
                             "audio": 10,
                             "reasoning": 200,
                         }
+                
+                .. versionadded:: 0.3.9
               ''',
               'properties': dict({
                 'audio': dict({
@@ -1679,6 +1683,10 @@
                                 "reasoning": 200,
                             }
                         }
+                
+                .. versionchanged:: 0.3.9
+                
+                    Added ``input_token_details`` and ``output_token_details``.
               ''',
               'properties': dict({
                 'input_token_details': dict({
diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr
index 4f53b88c6bec2..85b53552716a2 100644
--- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr
+++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr
@@ -2654,6 +2654,8 @@
                       "cache_creation": 200,
                       "cache_read": 100,
                   }
+          
+          .. versionadded:: 0.3.9
         ''',
         'properties': dict({
           'audio': dict({
@@ -2752,6 +2754,8 @@
                       "audio": 10,
                       "reasoning": 200,
                   }
+          
+          .. versionadded:: 0.3.9
         ''',
         'properties': dict({
           'audio': dict({
@@ -3281,6 +3285,10 @@
                           "reasoning": 200,
                       }
                   }
+          
+          .. versionchanged:: 0.3.9
+          
+              Added ``input_token_details`` and ``output_token_details``.
         ''',
         'properties': dict({
           'input_token_details': dict({
@@ -4119,6 +4127,8 @@
                       "cache_creation": 200,
                       "cache_read": 100,
                   }
+          
+          .. versionadded:: 0.3.9
         ''',
         'properties': dict({
           'audio': dict({
@@ -4217,6 +4227,8 @@
                       "audio": 10,
                       "reasoning": 200,
                   }
+          
+          .. versionadded:: 0.3.9
         ''',
         'properties': dict({
           'audio': dict({
@@ -4765,6 +4777,10 @@
                           "reasoning": 200,
                       }
                   }
+          
+          .. versionchanged:: 0.3.9
+          
+              Added ``input_token_details`` and ``output_token_details``.
         ''',
         'properties': dict({
           'input_token_details': dict({
@@ -5615,6 +5631,8 @@
                       "cache_creation": 200,
                       "cache_read": 100,
                   }
+          
+          .. versionadded:: 0.3.9
         ''',
         'properties': dict({
           'audio': dict({
@@ -5713,6 +5731,8 @@
                       "audio": 10,
                       "reasoning": 200,
                   }
+          
+          .. versionadded:: 0.3.9
         ''',
         'properties': dict({
           'audio': dict({
@@ -6261,6 +6281,10 @@
                           "reasoning": 200,
                       }
                   }
+          
+          .. versionchanged:: 0.3.9
+          
+              Added ``input_token_details`` and ``output_token_details``.
         ''',
         'properties': dict({
           'input_token_details': dict({
@@ -6987,6 +7011,8 @@
                       "cache_creation": 200,
                       "cache_read": 100,
                   }
+          
+          .. versionadded:: 0.3.9
         ''',
         'properties': dict({
           'audio': dict({
@@ -7085,6 +7111,8 @@
                       "audio": 10,
                       "reasoning": 200,
                   }
+          
+          .. versionadded:: 0.3.9
         ''',
         'properties': dict({
           'audio': dict({
@@ -7614,6 +7642,10 @@
                           "reasoning": 200,
                       }
                   }
+          
+          .. versionchanged:: 0.3.9
+          
+              Added ``input_token_details`` and ``output_token_details``.
         ''',
         'properties': dict({
           'input_token_details': dict({
@@ -8494,6 +8526,8 @@
                       "cache_creation": 200,
                       "cache_read": 100,
                   }
+          
+          .. versionadded:: 0.3.9
         ''',
         'properties': dict({
           'audio': dict({
@@ -8592,6 +8626,8 @@
                       "audio": 10,
                       "reasoning": 200,
                   }
+          
+          .. versionadded:: 0.3.9
         ''',
         'properties': dict({
           'audio': dict({
@@ -9140,6 +9176,10 @@
                           "reasoning": 200,
                       }
                   }
+          
+          .. versionchanged:: 0.3.9
+          
+              Added ``input_token_details`` and ``output_token_details``.
         ''',
         'properties': dict({
           'input_token_details': dict({
@@ -9911,6 +9951,8 @@
                       "cache_creation": 200,
                       "cache_read": 100,
                   }
+          
+          .. versionadded:: 0.3.9
         ''',
         'properties': dict({
           'audio': dict({
@@ -10009,6 +10051,8 @@
                       "audio": 10,
                       "reasoning": 200,
                   }
+          
+          .. versionadded:: 0.3.9
         ''',
         'properties': dict({
           'audio': dict({
@@ -10538,6 +10582,10 @@
                           "reasoning": 200,
                       }
                   }
+          
+          .. versionchanged:: 0.3.9
+          
+              Added ``input_token_details`` and ``output_token_details``.
         ''',
         'properties': dict({
           'input_token_details': dict({
@@ -11326,6 +11374,8 @@
                       "cache_creation": 200,
                       "cache_read": 100,
                   }
+          
+          .. versionadded:: 0.3.9
         ''',
         'properties': dict({
           'audio': dict({
@@ -11424,6 +11474,8 @@
                       "audio": 10,
                       "reasoning": 200,
                   }
+          
+          .. versionadded:: 0.3.9
         ''',
         'properties': dict({
           'audio': dict({
@@ -11983,6 +12035,10 @@
                           "reasoning": 200,
                       }
                   }
+          
+          .. versionchanged:: 0.3.9
+          
+              Added ``input_token_details`` and ``output_token_details``.
         ''',
         'properties': dict({
           'input_token_details': dict({
@@ -12783,6 +12839,8 @@
                       "cache_creation": 200,
                       "cache_read": 100,
                   }
+          
+          .. versionadded:: 0.3.9
         ''',
         'properties': dict({
           'audio': dict({
@@ -12881,6 +12939,8 @@
                       "audio": 10,
                       "reasoning": 200,
                   }
+          
+          .. versionadded:: 0.3.9
         ''',
         'properties': dict({
           'audio': dict({
@@ -13429,6 +13489,10 @@
                           "reasoning": 200,
                       }
                   }
+          
+          .. versionchanged:: 0.3.9
+          
+              Added ``input_token_details`` and ``output_token_details``.
         ''',
         'properties': dict({
           'input_token_details': dict({