From d005138ce46584a1e67958003559b3ba12f8718c Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 3 Oct 2024 10:21:11 -0700 Subject: [PATCH 01/13] core[patch]: add UsageMetadata details --- libs/core/langchain_core/messages/ai.py | 74 +++++++++++++++++++++++-- 1 file changed, 69 insertions(+), 5 deletions(-) diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 03a22e79764f5..a95503fdc3966 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -2,7 +2,7 @@ from typing import Any, Literal, Optional, Union from pydantic import model_validator -from typing_extensions import Self, TypedDict +from typing_extensions import NotRequired, Self, TypedDict from langchain_core.messages.base import ( BaseMessage, @@ -29,6 +29,62 @@ from langchain_core.utils.json import parse_partial_json +class InputTokenDetails(TypedDict, total=False): + """Breakdown of input token counts. + + Does *not* need to sum to full input token count. Does *not* need to have all keys. + + Example: + + .. code-block:: python + + { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + } + """ + + audio: int + """Audio input tokens.""" + cache_creation: int + """Input tokens that were cached and there was a cache miss. + + Since there was a cache miss, the cache was created from these tokens. + """ + cache_read: int + """Input tokens that were cached and there was a cache hit. + + Since there was a cache hit, the tokens were read from the cache. More precisely, + the model state given these tokens was read from the cache. + """ + + +class OutputTokenDetails(TypedDict, total=False): + """Breakdown of output token counts. + + Does *not* need to sum to full output token count. Does *not* need to have all keys. + + Example: + + .. code-block:: python + + { + "audio": 10, + "reasoning": 200, + } + """ + + audio: int + """Audio output tokens.""" + reasoning: int + """Reasoning output tokens. + + Tokens generated by the model in a chain of thought process (i.e. by OpenAI's o1 + models) that are not returned as part of model output. + """ + + class UsageMetadata(TypedDict): """Usage metadata for a message, such as token counts. @@ -41,16 +97,24 @@ class UsageMetadata(TypedDict): { "input_tokens": 10, "output_tokens": 20, - "total_tokens": 30 + "total_tokens": 30, + "input_token_details": { + ... + }, + "output_token_details": { + ... + }, } """ input_tokens: int - """Count of input (or prompt) tokens.""" + """Count of input (or prompt) tokens. Sum of all input token types.""" output_tokens: int - """Count of output (or completion) tokens.""" + """Count of output (or completion) tokens. Sum of all output token types.""" total_tokens: int - """Total token count.""" + """Total token count. Sum of input_tokens + output_tokens.""" + input_token_details: NotRequired[InputTokenDetails] + output_token_details: NotRequired[OutputTokenDetails] class AIMessage(BaseMessage): From 7ec41fc520d1f69158f054e0c52598c81d17681e Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 3 Oct 2024 10:22:04 -0700 Subject: [PATCH 02/13] fmt --- libs/core/langchain_core/messages/ai.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index a95503fdc3966..6336cc9b9fb76 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -99,11 +99,14 @@ class UsageMetadata(TypedDict): "output_tokens": 20, "total_tokens": 30, "input_token_details": { - ... + "audio": 10, + "cache_creation": 200, + "cache_read": 100, }, "output_token_details": { - ... - }, + "audio": 10, + "reasoning": 200, + } } """ From 12414a0983c9f47b2fe4c44f46128c4a2fd05148 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 3 Oct 2024 10:24:24 -0700 Subject: [PATCH 03/13] snapshots --- .../prompts/__snapshots__/test_chat.ambr | 156 ++++- .../runnables/__snapshots__/test_graph.ambr | 78 ++- .../__snapshots__/test_runnable.ambr | 624 +++++++++++++++++- 3 files changed, 847 insertions(+), 11 deletions(-) diff --git a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr index 5a235a0f9f933..85513fd122873 100644 --- a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr +++ b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr @@ -677,6 +677,39 @@ 'title': 'HumanMessageChunk', 'type': 'object', }), + 'InputTokenDetails': dict({ + 'description': ''' + Breakdown of input token counts. + + Does *not* need to sum to full input token count. Does *not* need to have all keys. + + Example: + + .. code-block:: python + + { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + } + ''', + 'properties': dict({ + 'audio': dict({ + 'title': 'Audio', + 'type': 'integer', + }), + 'cache_creation': dict({ + 'title': 'Cache Creation', + 'type': 'integer', + }), + 'cache_read': dict({ + 'title': 'Cache Read', + 'type': 'integer', + }), + }), + 'title': 'InputTokenDetails', + 'type': 'object', + }), 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. @@ -743,6 +776,34 @@ 'title': 'InvalidToolCall', 'type': 'object', }), + 'OutputTokenDetails': dict({ + 'description': ''' + Breakdown of output token counts. + + Does *not* need to sum to full output token count. Does *not* need to have all keys. + + Example: + + .. code-block:: python + + { + "audio": 10, + "reasoning": 200, + } + ''', + 'properties': dict({ + 'audio': dict({ + 'title': 'Audio', + 'type': 'integer', + }), + 'reasoning': dict({ + 'title': 'Reasoning', + 'type': 'integer', + }), + }), + 'title': 'OutputTokenDetails', + 'type': 'object', + }), 'SystemMessage': dict({ 'additionalProperties': True, 'description': ''' @@ -1247,14 +1308,29 @@ { "input_tokens": 10, "output_tokens": 20, - "total_tokens": 30 + "total_tokens": 30, + "input_token_details": { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + }, + "output_token_details": { + "audio": 10, + "reasoning": 200, + } } ''', 'properties': dict({ + 'input_token_details': dict({ + '$ref': '#/$defs/InputTokenDetails', + }), 'input_tokens': dict({ 'title': 'Input Tokens', 'type': 'integer', }), + 'output_token_details': dict({ + '$ref': '#/$defs/OutputTokenDetails', + }), 'output_tokens': dict({ 'title': 'Output Tokens', 'type': 'integer', @@ -2008,6 +2084,39 @@ 'title': 'HumanMessageChunk', 'type': 'object', }), + 'InputTokenDetails': dict({ + 'description': ''' + Breakdown of input token counts. + + Does *not* need to sum to full input token count. Does *not* need to have all keys. + + Example: + + .. code-block:: python + + { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + } + ''', + 'properties': dict({ + 'audio': dict({ + 'title': 'Audio', + 'type': 'integer', + }), + 'cache_creation': dict({ + 'title': 'Cache Creation', + 'type': 'integer', + }), + 'cache_read': dict({ + 'title': 'Cache Read', + 'type': 'integer', + }), + }), + 'title': 'InputTokenDetails', + 'type': 'object', + }), 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. @@ -2074,6 +2183,34 @@ 'title': 'InvalidToolCall', 'type': 'object', }), + 'OutputTokenDetails': dict({ + 'description': ''' + Breakdown of output token counts. + + Does *not* need to sum to full output token count. Does *not* need to have all keys. + + Example: + + .. code-block:: python + + { + "audio": 10, + "reasoning": 200, + } + ''', + 'properties': dict({ + 'audio': dict({ + 'title': 'Audio', + 'type': 'integer', + }), + 'reasoning': dict({ + 'title': 'Reasoning', + 'type': 'integer', + }), + }), + 'title': 'OutputTokenDetails', + 'type': 'object', + }), 'SystemMessage': dict({ 'additionalProperties': True, 'description': ''' @@ -2578,14 +2715,29 @@ { "input_tokens": 10, "output_tokens": 20, - "total_tokens": 30 + "total_tokens": 30, + "input_token_details": { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + }, + "output_token_details": { + "audio": 10, + "reasoning": 200, + } } ''', 'properties': dict({ + 'input_token_details': dict({ + '$ref': '#/$defs/InputTokenDetails', + }), 'input_tokens': dict({ 'title': 'Input Tokens', 'type': 'integer', }), + 'output_token_details': dict({ + '$ref': '#/$defs/OutputTokenDetails', + }), 'output_tokens': dict({ 'title': 'Output Tokens', 'type': 'integer', diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr index aa70b9bb5a3e9..13ebdea6171e8 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr @@ -1037,6 +1037,39 @@ 'title': 'HumanMessageChunk', 'type': 'object', }), + 'InputTokenDetails': dict({ + 'description': ''' + Breakdown of input token counts. + + Does *not* need to sum to full input token count. Does *not* need to have all keys. + + Example: + + .. code-block:: python + + { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + } + ''', + 'properties': dict({ + 'audio': dict({ + 'title': 'Audio', + 'type': 'integer', + }), + 'cache_creation': dict({ + 'title': 'Cache Creation', + 'type': 'integer', + }), + 'cache_read': dict({ + 'title': 'Cache Read', + 'type': 'integer', + }), + }), + 'title': 'InputTokenDetails', + 'type': 'object', + }), 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. @@ -1103,6 +1136,34 @@ 'title': 'InvalidToolCall', 'type': 'object', }), + 'OutputTokenDetails': dict({ + 'description': ''' + Breakdown of output token counts. + + Does *not* need to sum to full output token count. Does *not* need to have all keys. + + Example: + + .. code-block:: python + + { + "audio": 10, + "reasoning": 200, + } + ''', + 'properties': dict({ + 'audio': dict({ + 'title': 'Audio', + 'type': 'integer', + }), + 'reasoning': dict({ + 'title': 'Reasoning', + 'type': 'integer', + }), + }), + 'title': 'OutputTokenDetails', + 'type': 'object', + }), 'SystemMessage': dict({ 'additionalProperties': True, 'description': ''' @@ -1607,14 +1668,29 @@ { "input_tokens": 10, "output_tokens": 20, - "total_tokens": 30 + "total_tokens": 30, + "input_token_details": { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + }, + "output_token_details": { + "audio": 10, + "reasoning": 200, + } } ''', 'properties': dict({ + 'input_token_details': dict({ + '$ref': '#/$defs/InputTokenDetails', + }), 'input_tokens': dict({ 'title': 'Input Tokens', 'type': 'integer', }), + 'output_token_details': dict({ + '$ref': '#/$defs/OutputTokenDetails', + }), 'output_tokens': dict({ 'title': 'Output Tokens', 'type': 'integer', diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr index 045b9eced274c..1df398b7298ad 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr @@ -2639,6 +2639,39 @@ 'title': 'HumanMessageChunk', 'type': 'object', }), + 'InputTokenDetails': dict({ + 'description': ''' + Breakdown of input token counts. + + Does *not* need to sum to full input token count. Does *not* need to have all keys. + + Example: + + .. code-block:: python + + { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + } + ''', + 'properties': dict({ + 'audio': dict({ + 'title': 'Audio', + 'type': 'integer', + }), + 'cache_creation': dict({ + 'title': 'Cache Creation', + 'type': 'integer', + }), + 'cache_read': dict({ + 'title': 'Cache Read', + 'type': 'integer', + }), + }), + 'title': 'InputTokenDetails', + 'type': 'object', + }), 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. @@ -2705,6 +2738,34 @@ 'title': 'InvalidToolCall', 'type': 'object', }), + 'OutputTokenDetails': dict({ + 'description': ''' + Breakdown of output token counts. + + Does *not* need to sum to full output token count. Does *not* need to have all keys. + + Example: + + .. code-block:: python + + { + "audio": 10, + "reasoning": 200, + } + ''', + 'properties': dict({ + 'audio': dict({ + 'title': 'Audio', + 'type': 'integer', + }), + 'reasoning': dict({ + 'title': 'Reasoning', + 'type': 'integer', + }), + }), + 'title': 'OutputTokenDetails', + 'type': 'object', + }), 'SystemMessage': dict({ 'additionalProperties': True, 'description': ''' @@ -3209,14 +3270,29 @@ { "input_tokens": 10, "output_tokens": 20, - "total_tokens": 30 + "total_tokens": 30, + "input_token_details": { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + }, + "output_token_details": { + "audio": 10, + "reasoning": 200, + } } ''', 'properties': dict({ + 'input_token_details': dict({ + '$ref': '#/$defs/InputTokenDetails', + }), 'input_tokens': dict({ 'title': 'Input Tokens', 'type': 'integer', }), + 'output_token_details': dict({ + '$ref': '#/$defs/OutputTokenDetails', + }), 'output_tokens': dict({ 'title': 'Output Tokens', 'type': 'integer', @@ -4028,6 +4104,39 @@ 'title': 'HumanMessageChunk', 'type': 'object', }), + 'InputTokenDetails': dict({ + 'description': ''' + Breakdown of input token counts. + + Does *not* need to sum to full input token count. Does *not* need to have all keys. + + Example: + + .. code-block:: python + + { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + } + ''', + 'properties': dict({ + 'audio': dict({ + 'title': 'Audio', + 'type': 'integer', + }), + 'cache_creation': dict({ + 'title': 'Cache Creation', + 'type': 'integer', + }), + 'cache_read': dict({ + 'title': 'Cache Read', + 'type': 'integer', + }), + }), + 'title': 'InputTokenDetails', + 'type': 'object', + }), 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. @@ -4094,6 +4203,34 @@ 'title': 'InvalidToolCall', 'type': 'object', }), + 'OutputTokenDetails': dict({ + 'description': ''' + Breakdown of output token counts. + + Does *not* need to sum to full output token count. Does *not* need to have all keys. + + Example: + + .. code-block:: python + + { + "audio": 10, + "reasoning": 200, + } + ''', + 'properties': dict({ + 'audio': dict({ + 'title': 'Audio', + 'type': 'integer', + }), + 'reasoning': dict({ + 'title': 'Reasoning', + 'type': 'integer', + }), + }), + 'title': 'OutputTokenDetails', + 'type': 'object', + }), 'StringPromptValue': dict({ 'description': 'String prompt value.', 'properties': dict({ @@ -4617,14 +4754,29 @@ { "input_tokens": 10, "output_tokens": 20, - "total_tokens": 30 + "total_tokens": 30, + "input_token_details": { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + }, + "output_token_details": { + "audio": 10, + "reasoning": 200, + } } ''', 'properties': dict({ + 'input_token_details': dict({ + '$ref': '#/$defs/InputTokenDetails', + }), 'input_tokens': dict({ 'title': 'Input Tokens', 'type': 'integer', }), + 'output_token_details': dict({ + '$ref': '#/$defs/OutputTokenDetails', + }), 'output_tokens': dict({ 'title': 'Output Tokens', 'type': 'integer', @@ -5448,6 +5600,39 @@ 'title': 'HumanMessageChunk', 'type': 'object', }), + 'InputTokenDetails': dict({ + 'description': ''' + Breakdown of input token counts. + + Does *not* need to sum to full input token count. Does *not* need to have all keys. + + Example: + + .. code-block:: python + + { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + } + ''', + 'properties': dict({ + 'audio': dict({ + 'title': 'Audio', + 'type': 'integer', + }), + 'cache_creation': dict({ + 'title': 'Cache Creation', + 'type': 'integer', + }), + 'cache_read': dict({ + 'title': 'Cache Read', + 'type': 'integer', + }), + }), + 'title': 'InputTokenDetails', + 'type': 'object', + }), 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. @@ -5514,6 +5699,34 @@ 'title': 'InvalidToolCall', 'type': 'object', }), + 'OutputTokenDetails': dict({ + 'description': ''' + Breakdown of output token counts. + + Does *not* need to sum to full output token count. Does *not* need to have all keys. + + Example: + + .. code-block:: python + + { + "audio": 10, + "reasoning": 200, + } + ''', + 'properties': dict({ + 'audio': dict({ + 'title': 'Audio', + 'type': 'integer', + }), + 'reasoning': dict({ + 'title': 'Reasoning', + 'type': 'integer', + }), + }), + 'title': 'OutputTokenDetails', + 'type': 'object', + }), 'StringPromptValue': dict({ 'description': 'String prompt value.', 'properties': dict({ @@ -6037,14 +6250,29 @@ { "input_tokens": 10, "output_tokens": 20, - "total_tokens": 30 + "total_tokens": 30, + "input_token_details": { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + }, + "output_token_details": { + "audio": 10, + "reasoning": 200, + } } ''', 'properties': dict({ + 'input_token_details': dict({ + '$ref': '#/definitions/InputTokenDetails', + }), 'input_tokens': dict({ 'title': 'Input Tokens', 'type': 'integer', }), + 'output_token_details': dict({ + '$ref': '#/definitions/OutputTokenDetails', + }), 'output_tokens': dict({ 'title': 'Output Tokens', 'type': 'integer', @@ -6744,6 +6972,39 @@ 'title': 'HumanMessageChunk', 'type': 'object', }), + 'InputTokenDetails': dict({ + 'description': ''' + Breakdown of input token counts. + + Does *not* need to sum to full input token count. Does *not* need to have all keys. + + Example: + + .. code-block:: python + + { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + } + ''', + 'properties': dict({ + 'audio': dict({ + 'title': 'Audio', + 'type': 'integer', + }), + 'cache_creation': dict({ + 'title': 'Cache Creation', + 'type': 'integer', + }), + 'cache_read': dict({ + 'title': 'Cache Read', + 'type': 'integer', + }), + }), + 'title': 'InputTokenDetails', + 'type': 'object', + }), 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. @@ -6810,6 +7071,34 @@ 'title': 'InvalidToolCall', 'type': 'object', }), + 'OutputTokenDetails': dict({ + 'description': ''' + Breakdown of output token counts. + + Does *not* need to sum to full output token count. Does *not* need to have all keys. + + Example: + + .. code-block:: python + + { + "audio": 10, + "reasoning": 200, + } + ''', + 'properties': dict({ + 'audio': dict({ + 'title': 'Audio', + 'type': 'integer', + }), + 'reasoning': dict({ + 'title': 'Reasoning', + 'type': 'integer', + }), + }), + 'title': 'OutputTokenDetails', + 'type': 'object', + }), 'SystemMessage': dict({ 'additionalProperties': True, 'description': ''' @@ -7314,14 +7603,29 @@ { "input_tokens": 10, "output_tokens": 20, - "total_tokens": 30 + "total_tokens": 30, + "input_token_details": { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + }, + "output_token_details": { + "audio": 10, + "reasoning": 200, + } } ''', 'properties': dict({ + 'input_token_details': dict({ + '$ref': '#/definitions/InputTokenDetails', + }), 'input_tokens': dict({ 'title': 'Input Tokens', 'type': 'integer', }), + 'output_token_details': dict({ + '$ref': '#/definitions/OutputTokenDetails', + }), 'output_tokens': dict({ 'title': 'Output Tokens', 'type': 'integer', @@ -8175,6 +8479,39 @@ 'title': 'HumanMessageChunk', 'type': 'object', }), + 'InputTokenDetails': dict({ + 'description': ''' + Breakdown of input token counts. + + Does *not* need to sum to full input token count. Does *not* need to have all keys. + + Example: + + .. code-block:: python + + { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + } + ''', + 'properties': dict({ + 'audio': dict({ + 'title': 'Audio', + 'type': 'integer', + }), + 'cache_creation': dict({ + 'title': 'Cache Creation', + 'type': 'integer', + }), + 'cache_read': dict({ + 'title': 'Cache Read', + 'type': 'integer', + }), + }), + 'title': 'InputTokenDetails', + 'type': 'object', + }), 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. @@ -8241,6 +8578,34 @@ 'title': 'InvalidToolCall', 'type': 'object', }), + 'OutputTokenDetails': dict({ + 'description': ''' + Breakdown of output token counts. + + Does *not* need to sum to full output token count. Does *not* need to have all keys. + + Example: + + .. code-block:: python + + { + "audio": 10, + "reasoning": 200, + } + ''', + 'properties': dict({ + 'audio': dict({ + 'title': 'Audio', + 'type': 'integer', + }), + 'reasoning': dict({ + 'title': 'Reasoning', + 'type': 'integer', + }), + }), + 'title': 'OutputTokenDetails', + 'type': 'object', + }), 'StringPromptValue': dict({ 'description': 'String prompt value.', 'properties': dict({ @@ -8764,14 +9129,29 @@ { "input_tokens": 10, "output_tokens": 20, - "total_tokens": 30 + "total_tokens": 30, + "input_token_details": { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + }, + "output_token_details": { + "audio": 10, + "reasoning": 200, + } } ''', 'properties': dict({ + 'input_token_details': dict({ + '$ref': '#/definitions/InputTokenDetails', + }), 'input_tokens': dict({ 'title': 'Input Tokens', 'type': 'integer', }), + 'output_token_details': dict({ + '$ref': '#/definitions/OutputTokenDetails', + }), 'output_tokens': dict({ 'title': 'Output Tokens', 'type': 'integer', @@ -9516,6 +9896,39 @@ 'title': 'HumanMessageChunk', 'type': 'object', }), + 'InputTokenDetails': dict({ + 'description': ''' + Breakdown of input token counts. + + Does *not* need to sum to full input token count. Does *not* need to have all keys. + + Example: + + .. code-block:: python + + { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + } + ''', + 'properties': dict({ + 'audio': dict({ + 'title': 'Audio', + 'type': 'integer', + }), + 'cache_creation': dict({ + 'title': 'Cache Creation', + 'type': 'integer', + }), + 'cache_read': dict({ + 'title': 'Cache Read', + 'type': 'integer', + }), + }), + 'title': 'InputTokenDetails', + 'type': 'object', + }), 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. @@ -9582,6 +9995,34 @@ 'title': 'InvalidToolCall', 'type': 'object', }), + 'OutputTokenDetails': dict({ + 'description': ''' + Breakdown of output token counts. + + Does *not* need to sum to full output token count. Does *not* need to have all keys. + + Example: + + .. code-block:: python + + { + "audio": 10, + "reasoning": 200, + } + ''', + 'properties': dict({ + 'audio': dict({ + 'title': 'Audio', + 'type': 'integer', + }), + 'reasoning': dict({ + 'title': 'Reasoning', + 'type': 'integer', + }), + }), + 'title': 'OutputTokenDetails', + 'type': 'object', + }), 'SystemMessage': dict({ 'additionalProperties': True, 'description': ''' @@ -10086,14 +10527,29 @@ { "input_tokens": 10, "output_tokens": 20, - "total_tokens": 30 + "total_tokens": 30, + "input_token_details": { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + }, + "output_token_details": { + "audio": 10, + "reasoning": 200, + } } ''', 'properties': dict({ + 'input_token_details': dict({ + '$ref': '#/definitions/InputTokenDetails', + }), 'input_tokens': dict({ 'title': 'Input Tokens', 'type': 'integer', }), + 'output_token_details': dict({ + '$ref': '#/definitions/OutputTokenDetails', + }), 'output_tokens': dict({ 'title': 'Output Tokens', 'type': 'integer', @@ -10855,6 +11311,39 @@ 'title': 'HumanMessageChunk', 'type': 'object', }), + 'InputTokenDetails': dict({ + 'description': ''' + Breakdown of input token counts. + + Does *not* need to sum to full input token count. Does *not* need to have all keys. + + Example: + + .. code-block:: python + + { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + } + ''', + 'properties': dict({ + 'audio': dict({ + 'title': 'Audio', + 'type': 'integer', + }), + 'cache_creation': dict({ + 'title': 'Cache Creation', + 'type': 'integer', + }), + 'cache_read': dict({ + 'title': 'Cache Read', + 'type': 'integer', + }), + }), + 'title': 'InputTokenDetails', + 'type': 'object', + }), 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. @@ -10921,6 +11410,34 @@ 'title': 'InvalidToolCall', 'type': 'object', }), + 'OutputTokenDetails': dict({ + 'description': ''' + Breakdown of output token counts. + + Does *not* need to sum to full output token count. Does *not* need to have all keys. + + Example: + + .. code-block:: python + + { + "audio": 10, + "reasoning": 200, + } + ''', + 'properties': dict({ + 'audio': dict({ + 'title': 'Audio', + 'type': 'integer', + }), + 'reasoning': dict({ + 'title': 'Reasoning', + 'type': 'integer', + }), + }), + 'title': 'OutputTokenDetails', + 'type': 'object', + }), 'PromptTemplateOutput': dict({ 'anyOf': list([ dict({ @@ -11455,14 +11972,29 @@ { "input_tokens": 10, "output_tokens": 20, - "total_tokens": 30 + "total_tokens": 30, + "input_token_details": { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + }, + "output_token_details": { + "audio": 10, + "reasoning": 200, + } } ''', 'properties': dict({ + 'input_token_details': dict({ + '$ref': '#/definitions/InputTokenDetails', + }), 'input_tokens': dict({ 'title': 'Input Tokens', 'type': 'integer', }), + 'output_token_details': dict({ + '$ref': '#/definitions/OutputTokenDetails', + }), 'output_tokens': dict({ 'title': 'Output Tokens', 'type': 'integer', @@ -12236,6 +12768,39 @@ 'title': 'HumanMessageChunk', 'type': 'object', }), + 'InputTokenDetails': dict({ + 'description': ''' + Breakdown of input token counts. + + Does *not* need to sum to full input token count. Does *not* need to have all keys. + + Example: + + .. code-block:: python + + { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + } + ''', + 'properties': dict({ + 'audio': dict({ + 'title': 'Audio', + 'type': 'integer', + }), + 'cache_creation': dict({ + 'title': 'Cache Creation', + 'type': 'integer', + }), + 'cache_read': dict({ + 'title': 'Cache Read', + 'type': 'integer', + }), + }), + 'title': 'InputTokenDetails', + 'type': 'object', + }), 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. @@ -12302,6 +12867,34 @@ 'title': 'InvalidToolCall', 'type': 'object', }), + 'OutputTokenDetails': dict({ + 'description': ''' + Breakdown of output token counts. + + Does *not* need to sum to full output token count. Does *not* need to have all keys. + + Example: + + .. code-block:: python + + { + "audio": 10, + "reasoning": 200, + } + ''', + 'properties': dict({ + 'audio': dict({ + 'title': 'Audio', + 'type': 'integer', + }), + 'reasoning': dict({ + 'title': 'Reasoning', + 'type': 'integer', + }), + }), + 'title': 'OutputTokenDetails', + 'type': 'object', + }), 'StringPromptValue': dict({ 'description': 'String prompt value.', 'properties': dict({ @@ -12825,14 +13418,29 @@ { "input_tokens": 10, "output_tokens": 20, - "total_tokens": 30 + "total_tokens": 30, + "input_token_details": { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + }, + "output_token_details": { + "audio": 10, + "reasoning": 200, + } } ''', 'properties': dict({ + 'input_token_details': dict({ + '$ref': '#/definitions/InputTokenDetails', + }), 'input_tokens': dict({ 'title': 'Input Tokens', 'type': 'integer', }), + 'output_token_details': dict({ + '$ref': '#/definitions/OutputTokenDetails', + }), 'output_tokens': dict({ 'title': 'Output Tokens', 'type': 'integer', From b61aae55d3295e7cc4e764b1a1d0fbf0c5c3e0f4 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 3 Oct 2024 10:25:45 -0700 Subject: [PATCH 04/13] doc nit --- libs/core/langchain_core/messages/ai.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 6336cc9b9fb76..07e90b735f04c 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -95,9 +95,9 @@ class UsageMetadata(TypedDict): .. code-block:: python { - "input_tokens": 10, - "output_tokens": 20, - "total_tokens": 30, + "input_tokens": 350, + "output_tokens": 240, + "total_tokens": 590, "input_token_details": { "audio": 10, "cache_creation": 200, From 34a83d9bbb34ff5661b27d13678273b918f18f86 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 3 Oct 2024 10:26:39 -0700 Subject: [PATCH 05/13] snapshot --- .../prompts/__snapshots__/test_chat.ambr | 12 ++--- .../runnables/__snapshots__/test_graph.ambr | 6 +-- .../__snapshots__/test_runnable.ambr | 48 +++++++++---------- 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr index 85513fd122873..72734872c2c29 100644 --- a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr +++ b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr @@ -1306,9 +1306,9 @@ .. code-block:: python { - "input_tokens": 10, - "output_tokens": 20, - "total_tokens": 30, + "input_tokens": 350, + "output_tokens": 240, + "total_tokens": 590, "input_token_details": { "audio": 10, "cache_creation": 200, @@ -2713,9 +2713,9 @@ .. code-block:: python { - "input_tokens": 10, - "output_tokens": 20, - "total_tokens": 30, + "input_tokens": 350, + "output_tokens": 240, + "total_tokens": 590, "input_token_details": { "audio": 10, "cache_creation": 200, diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr index 13ebdea6171e8..2cd1e6aea7aa4 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr @@ -1666,9 +1666,9 @@ .. code-block:: python { - "input_tokens": 10, - "output_tokens": 20, - "total_tokens": 30, + "input_tokens": 350, + "output_tokens": 240, + "total_tokens": 590, "input_token_details": { "audio": 10, "cache_creation": 200, diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr index 1df398b7298ad..4f53b88c6bec2 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr @@ -3268,9 +3268,9 @@ .. code-block:: python { - "input_tokens": 10, - "output_tokens": 20, - "total_tokens": 30, + "input_tokens": 350, + "output_tokens": 240, + "total_tokens": 590, "input_token_details": { "audio": 10, "cache_creation": 200, @@ -4752,9 +4752,9 @@ .. code-block:: python { - "input_tokens": 10, - "output_tokens": 20, - "total_tokens": 30, + "input_tokens": 350, + "output_tokens": 240, + "total_tokens": 590, "input_token_details": { "audio": 10, "cache_creation": 200, @@ -6248,9 +6248,9 @@ .. code-block:: python { - "input_tokens": 10, - "output_tokens": 20, - "total_tokens": 30, + "input_tokens": 350, + "output_tokens": 240, + "total_tokens": 590, "input_token_details": { "audio": 10, "cache_creation": 200, @@ -7601,9 +7601,9 @@ .. code-block:: python { - "input_tokens": 10, - "output_tokens": 20, - "total_tokens": 30, + "input_tokens": 350, + "output_tokens": 240, + "total_tokens": 590, "input_token_details": { "audio": 10, "cache_creation": 200, @@ -9127,9 +9127,9 @@ .. code-block:: python { - "input_tokens": 10, - "output_tokens": 20, - "total_tokens": 30, + "input_tokens": 350, + "output_tokens": 240, + "total_tokens": 590, "input_token_details": { "audio": 10, "cache_creation": 200, @@ -10525,9 +10525,9 @@ .. code-block:: python { - "input_tokens": 10, - "output_tokens": 20, - "total_tokens": 30, + "input_tokens": 350, + "output_tokens": 240, + "total_tokens": 590, "input_token_details": { "audio": 10, "cache_creation": 200, @@ -11970,9 +11970,9 @@ .. code-block:: python { - "input_tokens": 10, - "output_tokens": 20, - "total_tokens": 30, + "input_tokens": 350, + "output_tokens": 240, + "total_tokens": 590, "input_token_details": { "audio": 10, "cache_creation": 200, @@ -13416,9 +13416,9 @@ .. code-block:: python { - "input_tokens": 10, - "output_tokens": 20, - "total_tokens": 30, + "input_tokens": 350, + "output_tokens": 240, + "total_tokens": 590, "input_token_details": { "audio": 10, "cache_creation": 200, From 5dd10ed87df87f1f015b32bac2fb1e5c96d818a6 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 3 Oct 2024 11:23:54 -0700 Subject: [PATCH 06/13] openai[patch]: add usage metadata details --- .../langchain_openai/chat_models/base.py | 31 ++++++++++++++++--- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index 21754937d180f..d2cc2d3883aab 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -63,7 +63,8 @@ ToolMessage, ToolMessageChunk, ) -from langchain_core.messages.ai import UsageMetadata +from langchain_core.messages.ai import UsageMetadata, InputTokenDetails, \ + OutputTokenDetails from langchain_core.messages.tool import tool_call_chunk from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser from langchain_core.output_parsers.openai_tools import ( @@ -284,13 +285,33 @@ def _convert_delta_to_message_chunk( def _convert_chunk_to_generation_chunk( chunk: dict, default_chunk_class: Type, base_generation_info: Optional[Dict] ) -> Optional[ChatGenerationChunk]: - token_usage = chunk.get("usage") + token_usage = chunk.get("usage", {}) choices = chunk.get("choices", []) + + input_tokens = token_usage.get("prompt_tokens", 0) + output_tokens = token_usage.get("completion_tokens", 0) + total_tokens = token_usage.get("total_tokens", input_tokens + output_tokens) + input_token_details = { + "audio": token_usage.get("prompt_tokens_details", {}).get("audio_tokens"), + "cache_read": token_usage.get("prompt_tokens_details", {}).get("cached_tokens"), + } + output_token_details = { + "audio": token_usage.get("completion_tokens_details", {}).get("audio_tokens"), + "reasoning": token_usage.get("completion_token_details", {}).get( + "reasoning_tokens" + ), + } usage_metadata: Optional[UsageMetadata] = ( UsageMetadata( - input_tokens=token_usage.get("prompt_tokens", 0), - output_tokens=token_usage.get("completion_tokens", 0), - total_tokens=token_usage.get("total_tokens", 0), + input_tokens=input_tokens, + output_tokens=output_tokens, + total_tokens=total_tokens, + input_token_details=InputTokenDetails(**{ + k: v for k, v in input_token_details.items() if v is not None + }), + output_token_details=OutputTokenDetails(**{ + k: v for k, v in output_token_details.items() if v is not None + }), ) if token_usage else None From 6833c6d92bf90d4962ba3a77244efe3f3ed1ebe9 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 3 Oct 2024 11:58:26 -0700 Subject: [PATCH 07/13] wip --- .../langchain_openai/chat_models/base.py | 76 ++++++++++--------- 1 file changed, 40 insertions(+), 36 deletions(-) diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index d2cc2d3883aab..c797f31303940 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -63,8 +63,11 @@ ToolMessage, ToolMessageChunk, ) -from langchain_core.messages.ai import UsageMetadata, InputTokenDetails, \ - OutputTokenDetails +from langchain_core.messages.ai import ( + InputTokenDetails, + OutputTokenDetails, + UsageMetadata, +) from langchain_core.messages.tool import tool_call_chunk from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser from langchain_core.output_parsers.openai_tools import ( @@ -285,38 +288,12 @@ def _convert_delta_to_message_chunk( def _convert_chunk_to_generation_chunk( chunk: dict, default_chunk_class: Type, base_generation_info: Optional[Dict] ) -> Optional[ChatGenerationChunk]: - token_usage = chunk.get("usage", {}) + token_usage = chunk.get("usage") choices = chunk.get("choices", []) - input_tokens = token_usage.get("prompt_tokens", 0) - output_tokens = token_usage.get("completion_tokens", 0) - total_tokens = token_usage.get("total_tokens", input_tokens + output_tokens) - input_token_details = { - "audio": token_usage.get("prompt_tokens_details", {}).get("audio_tokens"), - "cache_read": token_usage.get("prompt_tokens_details", {}).get("cached_tokens"), - } - output_token_details = { - "audio": token_usage.get("completion_tokens_details", {}).get("audio_tokens"), - "reasoning": token_usage.get("completion_token_details", {}).get( - "reasoning_tokens" - ), - } usage_metadata: Optional[UsageMetadata] = ( - UsageMetadata( - input_tokens=input_tokens, - output_tokens=output_tokens, - total_tokens=total_tokens, - input_token_details=InputTokenDetails(**{ - k: v for k, v in input_token_details.items() if v is not None - }), - output_token_details=OutputTokenDetails(**{ - k: v for k, v in output_token_details.items() if v is not None - }), - ) - if token_usage - else None + _create_usage_metadata(token_usage) if token_usage else None ) - if len(choices) == 0: # logprobs is implicitly None generation_chunk = ChatGenerationChunk( @@ -742,15 +719,11 @@ def _create_chat_result( if response_dict.get("error"): raise ValueError(response_dict.get("error")) - token_usage = response_dict.get("usage", {}) + token_usage = response_dict.get("usage") for res in response_dict["choices"]: message = _convert_dict_to_message(res["message"]) if token_usage and isinstance(message, AIMessage): - message.usage_metadata = { - "input_tokens": token_usage.get("prompt_tokens", 0), - "output_tokens": token_usage.get("completion_tokens", 0), - "total_tokens": token_usage.get("total_tokens", 0), - } + message.usage_metadata = _create_usage_metadata(token_usage) generation_info = generation_info or {} generation_info["finish_reason"] = ( res.get("finish_reason") @@ -2181,3 +2154,34 @@ class OpenAIRefusalError(Exception): .. versionadded:: 0.1.21 """ + + +def _create_usage_metadata(oai_token_usage: dict) -> UsageMetadata: + input_tokens = oai_token_usage.get("prompt_tokens", 0) + output_tokens = oai_token_usage.get("completion_tokens", 0) + total_tokens = oai_token_usage.get("total_tokens", input_tokens + output_tokens) + input_token_details = { + "audio": oai_token_usage.get("prompt_tokens_details", {}).get("audio_tokens"), + "cache_read": oai_token_usage.get("prompt_tokens_details", {}).get( + "cached_tokens" + ), + } + output_token_details = { + "audio": oai_token_usage.get("completion_tokens_details", {}).get( + "audio_tokens" + ), + "reasoning": oai_token_usage.get("completion_token_details", {}).get( + "reasoning_tokens" + ), + } + return UsageMetadata( + input_tokens=input_tokens, + output_tokens=output_tokens, + total_tokens=total_tokens, + input_token_details=InputTokenDetails( + **{k: v for k, v in input_token_details.items() if v is not None} + ), + output_token_details=OutputTokenDetails( + **{k: v for k, v in output_token_details.items() if v is not None} + ), + ) From ca3a1032be1d4e8e4b01cdc2775bbbcf73ce7ef1 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 3 Oct 2024 12:46:58 -0700 Subject: [PATCH 08/13] fmt --- .../integration_tests/chat_models.py | 65 +++++++++++++++++++ .../unit_tests/chat_models.py | 4 ++ 2 files changed, 69 insertions(+) diff --git a/libs/standard-tests/langchain_standard_tests/integration_tests/chat_models.py b/libs/standard-tests/langchain_standard_tests/integration_tests/chat_models.py index c5ac855cdc6a2..1da209095ef40 100644 --- a/libs/standard-tests/langchain_standard_tests/integration_tests/chat_models.py +++ b/libs/standard-tests/langchain_standard_tests/integration_tests/chat_models.py @@ -151,6 +151,31 @@ def test_usage_metadata(self, model: BaseChatModel) -> None: assert isinstance(result.usage_metadata["output_tokens"], int) assert isinstance(result.usage_metadata["total_tokens"], int) + if "audio_input" in self.supported_usage_metadata_details: + msg = self.invoke_with_audio_input() + assert isinstance(msg.usage_metadata["input_token_details"]["audio"], int) # type: ignore[index] + if "audio_output" in self.supported_usage_metadata_details: + msg = self.invoke_with_audio_output() + assert isinstance(msg.usage_metadata["output_token_details"]["audio"], int) # type: ignore[index] + if "reasoning_output" in self.supported_usage_metadata_details: + msg = self.invoke_with_reasoning_output() + assert isinstance( + msg.usage_metadata["output_token_details"]["reasoning"], # type: ignore[index] + int, + ) + if "cache_read_input" in self.supported_usage_metadata_details: + msg = self.invoke_with_cache_read_input() + assert isinstance( + msg.usage_metadata["input_token_details"]["cache_read"], # type: ignore[index] + int, + ) + if "cache_creation_input" in self.supported_usage_metadata_details: + msg = self.invoke_with_cache_creation_input() + assert isinstance( + msg.usage_metadata["input_token_details"]["cache_creation"], # type: ignore[index] + int, + ) + def test_usage_metadata_streaming(self, model: BaseChatModel) -> None: if not self.returns_usage_metadata: pytest.skip("Not implemented.") @@ -164,6 +189,31 @@ def test_usage_metadata_streaming(self, model: BaseChatModel) -> None: assert isinstance(full.usage_metadata["output_tokens"], int) assert isinstance(full.usage_metadata["total_tokens"], int) + if "audio_input" in self.supported_usage_metadata_details: + msg = self.invoke_with_audio_input(stream=True) + assert isinstance(msg.usage_metadata["input_token_details"]["audio"], int) # type: ignore[index] + if "audio_output" in self.supported_usage_metadata_details: + msg = self.invoke_with_audio_output(stream=True) + assert isinstance(msg.usage_metadata["output_token_details"]["audio"], int) # type: ignore[index] + if "reasoning_output" in self.supported_usage_metadata_details: + msg = self.invoke_with_reasoning_output(stream=True) + assert isinstance( + msg.usage_metadata["output_token_details"]["reasoning"], # type: ignore[index] + int, + ) + if "cache_read_input" in self.supported_usage_metadata_details: + msg = self.invoke_with_cache_read_input(stream=True) + assert isinstance( + msg.usage_metadata["input_token_details"]["cache_read"], # type: ignore[index] + int, + ) + if "cache_creation_input" in self.supported_usage_metadata_details: + msg = self.invoke_with_cache_creation_input(stream=True) + assert isinstance( + msg.usage_metadata["input_token_details"]["cache_creation"], # type: ignore[index] + int, + ) + def test_stop_sequence(self, model: BaseChatModel) -> None: result = model.invoke("hi", stop=["you"]) assert isinstance(result, AIMessage) @@ -608,3 +658,18 @@ def test_message_with_name(self, model: BaseChatModel) -> None: assert isinstance(result, AIMessage) assert isinstance(result.content, str) assert len(result.content) > 0 + + def invoke_with_audio_input(self, *, stream: bool = False) -> AIMessage: + raise NotImplementedError() + + def invoke_with_audio_output(self, *, stream: bool = False) -> AIMessage: + raise NotImplementedError() + + def invoke_with_reasoning_output(self, *, stream: bool = False) -> AIMessage: + raise NotImplementedError() + + def invoke_with_cache_read_input(self, *, stream: bool = False) -> AIMessage: + raise NotImplementedError() + + def invoke_with_cache_creation_input(self, *, stream: bool = False) -> AIMessage: + raise NotImplementedError() diff --git a/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py b/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py index 1a611f1800f5c..d3df72166071c 100644 --- a/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py +++ b/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py @@ -138,6 +138,10 @@ def supports_anthropic_inputs(self) -> bool: def supports_image_tool_message(self) -> bool: return False + @property + def supported_usage_metadata_details(self) -> List[str]: + return [] + class ChatModelUnitTests(ChatModelTests): @property From 78161ad9117a05a16255e37897cb359829d45361 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 3 Oct 2024 13:11:34 -0700 Subject: [PATCH 09/13] fmt --- .../langchain_openai/chat_models/base.py | 6 +-- .../chat_models/test_base_standard.py | 51 ++++++++++++++++++- .../unit_tests/chat_models.py | 12 ++++- 3 files changed, 64 insertions(+), 5 deletions(-) diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index c797f31303940..27d4adf06817a 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -2160,17 +2160,17 @@ def _create_usage_metadata(oai_token_usage: dict) -> UsageMetadata: input_tokens = oai_token_usage.get("prompt_tokens", 0) output_tokens = oai_token_usage.get("completion_tokens", 0) total_tokens = oai_token_usage.get("total_tokens", input_tokens + output_tokens) - input_token_details = { + input_token_details: dict = { "audio": oai_token_usage.get("prompt_tokens_details", {}).get("audio_tokens"), "cache_read": oai_token_usage.get("prompt_tokens_details", {}).get( "cached_tokens" ), } - output_token_details = { + output_token_details: dict = { "audio": oai_token_usage.get("completion_tokens_details", {}).get( "audio_tokens" ), - "reasoning": oai_token_usage.get("completion_token_details", {}).get( + "reasoning": oai_token_usage.get("completion_tokens_details", {}).get( "reasoning_tokens" ), } diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py index 22b305c9753d4..b91b590ad7cd9 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py @@ -1,12 +1,16 @@ """Standard LangChain interface tests""" -from typing import Type +from pathlib import Path +from typing import List, Literal, Type, cast from langchain_core.language_models import BaseChatModel +from langchain_core.messages import AIMessage from langchain_standard_tests.integration_tests import ChatModelIntegrationTests from langchain_openai import ChatOpenAI +REPO_ROOT_DIR = Path(__file__).parents[6] + class TestOpenAIStandard(ChatModelIntegrationTests): @property @@ -20,3 +24,48 @@ def chat_model_params(self) -> dict: @property def supports_image_inputs(self) -> bool: return True + + @property + def supported_usage_metadata_details( + self, + ) -> List[ + Literal[ + "audio_input", + "audio_output", + "reasoning_output", + "cache_read_input", + "cache_creation_input", + ] + ]: + return ["reasoning_output", "cache_read_input"] + + def invoke_with_cache_read_input(self, *, stream: bool = False) -> AIMessage: + with open(REPO_ROOT_DIR / "README.md", "r") as f: + readme = f.read() + + input_ = f"""What's langchain? Here's the langchain README: + + {readme} + """ + llm = ChatOpenAI(model="gpt-4o-mini", stream_usage=True) + _invoke(llm, input_, stream) + # invoke twice so first invocation is cached + return _invoke(llm, input_, stream) + + def invoke_with_reasoning_output(self, *, stream: bool = False) -> AIMessage: + llm = ChatOpenAI(model="o1-mini", stream_usage=True, temperature=1) + input_ = ( + "explain the relationship between the 2008/9 economic crisis and the " + "startup ecosystem in the early 2010s" + ) + return _invoke(llm, input_, stream) + + +def _invoke(llm: ChatOpenAI, input_: str, stream: bool) -> AIMessage: + if stream: + full = None + for chunk in llm.stream(input_): + full = full + chunk if full else chunk # type: ignore[operator] + return cast(AIMessage, full) + else: + return cast(AIMessage, llm.invoke(input_)) diff --git a/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py b/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py index d3df72166071c..1298c022852cf 100644 --- a/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py +++ b/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py @@ -139,7 +139,17 @@ def supports_image_tool_message(self) -> bool: return False @property - def supported_usage_metadata_details(self) -> List[str]: + def supported_usage_metadata_details( + self, + ) -> List[ + Literal[ + "audio_input", + "audio_output", + "reasoning_output", + "cache_read_input", + "cache_creation_input", + ] + ]: return [] From 3d4437a502459935b206b1416498c93364f9adf6 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 3 Oct 2024 13:13:41 -0700 Subject: [PATCH 10/13] fmt --- .../unit_tests/chat_models.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py b/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py index d3df72166071c..1298c022852cf 100644 --- a/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py +++ b/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py @@ -139,7 +139,17 @@ def supports_image_tool_message(self) -> bool: return False @property - def supported_usage_metadata_details(self) -> List[str]: + def supported_usage_metadata_details( + self, + ) -> List[ + Literal[ + "audio_input", + "audio_output", + "reasoning_output", + "cache_read_input", + "cache_creation_input", + ] + ]: return [] From 05829ed185937c4d07d9b449c62fa16357df2068 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 3 Oct 2024 13:18:02 -0700 Subject: [PATCH 11/13] docs --- libs/core/langchain_core/messages/ai.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 07e90b735f04c..a86c51b0664c9 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -43,6 +43,8 @@ class InputTokenDetails(TypedDict, total=False): "cache_creation": 200, "cache_read": 100, } + + .. versionadded:: 0.3.9 """ audio: int @@ -73,6 +75,8 @@ class OutputTokenDetails(TypedDict, total=False): "audio": 10, "reasoning": 200, } + + .. versionadded:: 0.3.9 """ audio: int @@ -108,6 +112,10 @@ class UsageMetadata(TypedDict): "reasoning": 200, } } + + .. versionchanged:: 0.3.9 + + Added ``input_token_details`` and ``output_token_details``. """ input_tokens: int @@ -117,7 +125,15 @@ class UsageMetadata(TypedDict): total_tokens: int """Total token count. Sum of input_tokens + output_tokens.""" input_token_details: NotRequired[InputTokenDetails] + """Breakdown of input token counts. + + Does *not* need to sum to full input token count. Does *not* need to have all keys. + """ output_token_details: NotRequired[OutputTokenDetails] + """Breakdown of output token counts. + + Does *not* need to sum to full output token count. Does *not* need to have all keys. + """ class AIMessage(BaseMessage): From 2d4b4a9ab8d9d7b8a5a84c699cffefa5cafacc9b Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 3 Oct 2024 13:24:41 -0700 Subject: [PATCH 12/13] fmt --- libs/core/langchain_core/messages/ai.py | 4 ++-- libs/core/pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index a86c51b0664c9..dece1e575eeb6 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -126,12 +126,12 @@ class UsageMetadata(TypedDict): """Total token count. Sum of input_tokens + output_tokens.""" input_token_details: NotRequired[InputTokenDetails] """Breakdown of input token counts. - + Does *not* need to sum to full input token count. Does *not* need to have all keys. """ output_token_details: NotRequired[OutputTokenDetails] """Breakdown of output token counts. - + Does *not* need to sum to full output token count. Does *not* need to have all keys. """ diff --git a/libs/core/pyproject.toml b/libs/core/pyproject.toml index 11a2e85aa376c..20dbf792c952d 100644 --- a/libs/core/pyproject.toml +++ b/libs/core/pyproject.toml @@ -45,7 +45,7 @@ python = ">=3.12.4" [tool.ruff.lint] select = [ "B", "C4", "E", "F", "I", "N", "PIE", "SIM", "T201", "UP", "W",] -ignore = [ "UP007",] +ignore = [ "UP007", 'W293'] [tool.coverage.run] omit = [ "tests/*",] From 4854a0b9331ac780d40985d1aa791b284077e34b Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 3 Oct 2024 13:28:00 -0700 Subject: [PATCH 13/13] snapshot --- .../prompts/__snapshots__/test_chat.ambr | 16 +++++ .../runnables/__snapshots__/test_graph.ambr | 8 +++ .../__snapshots__/test_runnable.ambr | 64 +++++++++++++++++++ 3 files changed, 88 insertions(+) diff --git a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr index 72734872c2c29..7e35bd6c46548 100644 --- a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr +++ b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr @@ -692,6 +692,8 @@ "cache_creation": 200, "cache_read": 100, } + + .. versionadded:: 0.3.9 ''', 'properties': dict({ 'audio': dict({ @@ -790,6 +792,8 @@ "audio": 10, "reasoning": 200, } + + .. versionadded:: 0.3.9 ''', 'properties': dict({ 'audio': dict({ @@ -1319,6 +1323,10 @@ "reasoning": 200, } } + + .. versionchanged:: 0.3.9 + + Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ 'input_token_details': dict({ @@ -2099,6 +2107,8 @@ "cache_creation": 200, "cache_read": 100, } + + .. versionadded:: 0.3.9 ''', 'properties': dict({ 'audio': dict({ @@ -2197,6 +2207,8 @@ "audio": 10, "reasoning": 200, } + + .. versionadded:: 0.3.9 ''', 'properties': dict({ 'audio': dict({ @@ -2726,6 +2738,10 @@ "reasoning": 200, } } + + .. versionchanged:: 0.3.9 + + Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ 'input_token_details': dict({ diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr index 2cd1e6aea7aa4..ba9d742b37407 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr @@ -1052,6 +1052,8 @@ "cache_creation": 200, "cache_read": 100, } + + .. versionadded:: 0.3.9 ''', 'properties': dict({ 'audio': dict({ @@ -1150,6 +1152,8 @@ "audio": 10, "reasoning": 200, } + + .. versionadded:: 0.3.9 ''', 'properties': dict({ 'audio': dict({ @@ -1679,6 +1683,10 @@ "reasoning": 200, } } + + .. versionchanged:: 0.3.9 + + Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ 'input_token_details': dict({ diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr index 4f53b88c6bec2..85b53552716a2 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr @@ -2654,6 +2654,8 @@ "cache_creation": 200, "cache_read": 100, } + + .. versionadded:: 0.3.9 ''', 'properties': dict({ 'audio': dict({ @@ -2752,6 +2754,8 @@ "audio": 10, "reasoning": 200, } + + .. versionadded:: 0.3.9 ''', 'properties': dict({ 'audio': dict({ @@ -3281,6 +3285,10 @@ "reasoning": 200, } } + + .. versionchanged:: 0.3.9 + + Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ 'input_token_details': dict({ @@ -4119,6 +4127,8 @@ "cache_creation": 200, "cache_read": 100, } + + .. versionadded:: 0.3.9 ''', 'properties': dict({ 'audio': dict({ @@ -4217,6 +4227,8 @@ "audio": 10, "reasoning": 200, } + + .. versionadded:: 0.3.9 ''', 'properties': dict({ 'audio': dict({ @@ -4765,6 +4777,10 @@ "reasoning": 200, } } + + .. versionchanged:: 0.3.9 + + Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ 'input_token_details': dict({ @@ -5615,6 +5631,8 @@ "cache_creation": 200, "cache_read": 100, } + + .. versionadded:: 0.3.9 ''', 'properties': dict({ 'audio': dict({ @@ -5713,6 +5731,8 @@ "audio": 10, "reasoning": 200, } + + .. versionadded:: 0.3.9 ''', 'properties': dict({ 'audio': dict({ @@ -6261,6 +6281,10 @@ "reasoning": 200, } } + + .. versionchanged:: 0.3.9 + + Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ 'input_token_details': dict({ @@ -6987,6 +7011,8 @@ "cache_creation": 200, "cache_read": 100, } + + .. versionadded:: 0.3.9 ''', 'properties': dict({ 'audio': dict({ @@ -7085,6 +7111,8 @@ "audio": 10, "reasoning": 200, } + + .. versionadded:: 0.3.9 ''', 'properties': dict({ 'audio': dict({ @@ -7614,6 +7642,10 @@ "reasoning": 200, } } + + .. versionchanged:: 0.3.9 + + Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ 'input_token_details': dict({ @@ -8494,6 +8526,8 @@ "cache_creation": 200, "cache_read": 100, } + + .. versionadded:: 0.3.9 ''', 'properties': dict({ 'audio': dict({ @@ -8592,6 +8626,8 @@ "audio": 10, "reasoning": 200, } + + .. versionadded:: 0.3.9 ''', 'properties': dict({ 'audio': dict({ @@ -9140,6 +9176,10 @@ "reasoning": 200, } } + + .. versionchanged:: 0.3.9 + + Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ 'input_token_details': dict({ @@ -9911,6 +9951,8 @@ "cache_creation": 200, "cache_read": 100, } + + .. versionadded:: 0.3.9 ''', 'properties': dict({ 'audio': dict({ @@ -10009,6 +10051,8 @@ "audio": 10, "reasoning": 200, } + + .. versionadded:: 0.3.9 ''', 'properties': dict({ 'audio': dict({ @@ -10538,6 +10582,10 @@ "reasoning": 200, } } + + .. versionchanged:: 0.3.9 + + Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ 'input_token_details': dict({ @@ -11326,6 +11374,8 @@ "cache_creation": 200, "cache_read": 100, } + + .. versionadded:: 0.3.9 ''', 'properties': dict({ 'audio': dict({ @@ -11424,6 +11474,8 @@ "audio": 10, "reasoning": 200, } + + .. versionadded:: 0.3.9 ''', 'properties': dict({ 'audio': dict({ @@ -11983,6 +12035,10 @@ "reasoning": 200, } } + + .. versionchanged:: 0.3.9 + + Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ 'input_token_details': dict({ @@ -12783,6 +12839,8 @@ "cache_creation": 200, "cache_read": 100, } + + .. versionadded:: 0.3.9 ''', 'properties': dict({ 'audio': dict({ @@ -12881,6 +12939,8 @@ "audio": 10, "reasoning": 200, } + + .. versionadded:: 0.3.9 ''', 'properties': dict({ 'audio': dict({ @@ -13429,6 +13489,10 @@ "reasoning": 200, } } + + .. versionchanged:: 0.3.9 + + Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ 'input_token_details': dict({