Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

core[patch]: add UsageMetadata details #27072

Merged
merged 10 commits into from
Oct 3, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 74 additions & 7 deletions libs/core/langchain_core/messages/ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Any, Literal, Optional, Union

from pydantic import model_validator
from typing_extensions import Self, TypedDict
from typing_extensions import NotRequired, Self, TypedDict

from langchain_core.messages.base import (
BaseMessage,
Expand All @@ -29,6 +29,62 @@
from langchain_core.utils.json import parse_partial_json


class InputTokenDetails(TypedDict, total=False):
"""Breakdown of input token counts.

Does *not* need to sum to full input token count. Does *not* need to have all keys.

Example:

.. code-block:: python

{
"audio": 10,
"cache_creation": 200,
"cache_read": 100,
}
baskaryan marked this conversation as resolved.
Show resolved Hide resolved
"""

audio: int
"""Audio input tokens."""
cache_creation: int
baskaryan marked this conversation as resolved.
Show resolved Hide resolved
"""Input tokens that were cached and there was a cache miss.

Since there was a cache miss, the cache was created from these tokens.
"""
cache_read: int
"""Input tokens that were cached and there was a cache hit.

Since there was a cache hit, the tokens were read from the cache. More precisely,
the model state given these tokens was read from the cache.
"""


class OutputTokenDetails(TypedDict, total=False):
"""Breakdown of output token counts.

Does *not* need to sum to full output token count. Does *not* need to have all keys.

Example:

.. code-block:: python

{
"audio": 10,
"reasoning": 200,
}
"""

audio: int
"""Audio output tokens."""
reasoning: int
"""Reasoning output tokens.

Tokens generated by the model in a chain of thought process (i.e. by OpenAI's o1
models) that are not returned as part of model output.
"""


class UsageMetadata(TypedDict):
"""Usage metadata for a message, such as token counts.

Expand All @@ -39,18 +95,29 @@ class UsageMetadata(TypedDict):
.. code-block:: python

{
"input_tokens": 10,
"output_tokens": 20,
"total_tokens": 30
"input_tokens": 350,
"output_tokens": 240,
"total_tokens": 590,
"input_token_details": {
"audio": 10,
"cache_creation": 200,
"cache_read": 100,
},
"output_token_details": {
"audio": 10,
"reasoning": 200,
}
}
"""

input_tokens: int
"""Count of input (or prompt) tokens."""
"""Count of input (or prompt) tokens. Sum of all input token types."""
output_tokens: int
"""Count of output (or completion) tokens."""
"""Count of output (or completion) tokens. Sum of all output token types."""
total_tokens: int
"""Total token count."""
"""Total token count. Sum of input_tokens + output_tokens."""
input_token_details: NotRequired[InputTokenDetails]
baskaryan marked this conversation as resolved.
Show resolved Hide resolved
output_token_details: NotRequired[OutputTokenDetails]


class AIMessage(BaseMessage):
Expand Down
156 changes: 154 additions & 2 deletions libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -677,6 +677,39 @@
'title': 'HumanMessageChunk',
'type': 'object',
}),
'InputTokenDetails': dict({
'description': '''
Breakdown of input token counts.

Does *not* need to sum to full input token count. Does *not* need to have all keys.

Example:

.. code-block:: python

{
"audio": 10,
"cache_creation": 200,
"cache_read": 100,
}
''',
'properties': dict({
'audio': dict({
'title': 'Audio',
'type': 'integer',
}),
'cache_creation': dict({
'title': 'Cache Creation',
'type': 'integer',
}),
'cache_read': dict({
'title': 'Cache Read',
'type': 'integer',
}),
}),
'title': 'InputTokenDetails',
'type': 'object',
}),
'InvalidToolCall': dict({
'description': '''
Allowance for errors made by LLM.
Expand Down Expand Up @@ -743,6 +776,34 @@
'title': 'InvalidToolCall',
'type': 'object',
}),
'OutputTokenDetails': dict({
'description': '''
Breakdown of output token counts.

Does *not* need to sum to full output token count. Does *not* need to have all keys.

Example:

.. code-block:: python

{
"audio": 10,
"reasoning": 200,
}
''',
'properties': dict({
'audio': dict({
'title': 'Audio',
'type': 'integer',
}),
'reasoning': dict({
'title': 'Reasoning',
'type': 'integer',
}),
}),
'title': 'OutputTokenDetails',
'type': 'object',
}),
'SystemMessage': dict({
'additionalProperties': True,
'description': '''
Expand Down Expand Up @@ -1247,14 +1308,29 @@
{
"input_tokens": 10,
"output_tokens": 20,
"total_tokens": 30
"total_tokens": 30,
"input_token_details": {
"audio": 10,
"cache_creation": 200,
"cache_read": 100,
},
"output_token_details": {
"audio": 10,
"reasoning": 200,
}
}
''',
'properties': dict({
'input_token_details': dict({
'$ref': '#/$defs/InputTokenDetails',
}),
'input_tokens': dict({
'title': 'Input Tokens',
'type': 'integer',
}),
'output_token_details': dict({
'$ref': '#/$defs/OutputTokenDetails',
}),
'output_tokens': dict({
'title': 'Output Tokens',
'type': 'integer',
Expand Down Expand Up @@ -2008,6 +2084,39 @@
'title': 'HumanMessageChunk',
'type': 'object',
}),
'InputTokenDetails': dict({
'description': '''
Breakdown of input token counts.

Does *not* need to sum to full input token count. Does *not* need to have all keys.

Example:

.. code-block:: python

{
"audio": 10,
"cache_creation": 200,
"cache_read": 100,
}
''',
'properties': dict({
'audio': dict({
'title': 'Audio',
'type': 'integer',
}),
'cache_creation': dict({
'title': 'Cache Creation',
'type': 'integer',
}),
'cache_read': dict({
'title': 'Cache Read',
'type': 'integer',
}),
}),
'title': 'InputTokenDetails',
'type': 'object',
}),
'InvalidToolCall': dict({
'description': '''
Allowance for errors made by LLM.
Expand Down Expand Up @@ -2074,6 +2183,34 @@
'title': 'InvalidToolCall',
'type': 'object',
}),
'OutputTokenDetails': dict({
'description': '''
Breakdown of output token counts.

Does *not* need to sum to full output token count. Does *not* need to have all keys.

Example:

.. code-block:: python

{
"audio": 10,
"reasoning": 200,
}
''',
'properties': dict({
'audio': dict({
'title': 'Audio',
'type': 'integer',
}),
'reasoning': dict({
'title': 'Reasoning',
'type': 'integer',
}),
}),
'title': 'OutputTokenDetails',
'type': 'object',
}),
'SystemMessage': dict({
'additionalProperties': True,
'description': '''
Expand Down Expand Up @@ -2578,14 +2715,29 @@
{
"input_tokens": 10,
"output_tokens": 20,
"total_tokens": 30
"total_tokens": 30,
"input_token_details": {
"audio": 10,
"cache_creation": 200,
"cache_read": 100,
},
"output_token_details": {
"audio": 10,
"reasoning": 200,
}
}
''',
'properties': dict({
'input_token_details': dict({
'$ref': '#/$defs/InputTokenDetails',
}),
'input_tokens': dict({
'title': 'Input Tokens',
'type': 'integer',
}),
'output_token_details': dict({
'$ref': '#/$defs/OutputTokenDetails',
}),
'output_tokens': dict({
'title': 'Output Tokens',
'type': 'integer',
Expand Down
Loading
Loading