diff --git a/sdks/python/src/opik/integrations/llama_index/callback.py b/sdks/python/src/opik/integrations/llama_index/callback.py index 5706ebe6bf..8b7aa4a9b3 100644 --- a/sdks/python/src/opik/integrations/llama_index/callback.py +++ b/sdks/python/src/opik/integrations/llama_index/callback.py @@ -156,6 +156,10 @@ def on_event_end( # Log the output to the span with the matching id if event_id in self._map_event_id_to_span_data: span_data = self._map_event_id_to_span_data[event_id] + + llm_usage_info = event_parsing_utils.get_usage_data(payload) + span_data.update(**llm_usage_info.__dict__) + span_data.update(output=span_output).init_end_time() self._opik_client.span(**span_data.__dict__) diff --git a/sdks/python/src/opik/integrations/llama_index/event_parsing_utils.py b/sdks/python/src/opik/integrations/llama_index/event_parsing_utils.py index 3ad94ce9f2..8edc7e158e 100644 --- a/sdks/python/src/opik/integrations/llama_index/event_parsing_utils.py +++ b/sdks/python/src/opik/integrations/llama_index/event_parsing_utils.py @@ -1,6 +1,19 @@ -from typing import Optional, Dict, Any +import dataclasses +from typing import Any, Dict, Optional + +from llama_index.core import Settings +from llama_index.core.base.llms.types import ChatResponse from llama_index.core.callbacks import schema as llama_index_schema +from opik.types import UsageDict + + +@dataclasses.dataclass +class LLMUsageInfo: + provider: Optional[str] = None + model: Optional[str] = None + usage: Optional[UsageDict] = None + def get_span_input_from_events( event_type: llama_index_schema.CBEventType, payload: Optional[Dict[str, Any]] @@ -109,3 +122,36 @@ def get_span_output_from_event( return {"output": payload_copy} else: return None + + +def get_usage_data( + payload: Optional[Dict[str, Any]], +) -> LLMUsageInfo: + llm_usage_info = LLMUsageInfo() + + if payload is None or len(payload) == 0: + return llm_usage_info + + # The comment for LLAMAIndex version 0.12.8: + # Here we manually parse token usage info for OpenAI only (and we could do so for other providers), + # although we could try to use TokenCountingHandler. + # However, TokenCountingHandler currently also supports only OpenAI models. + + if "openai" not in Settings.llm.class_name().lower(): + return llm_usage_info + + response: Optional[ChatResponse] = payload.get( + llama_index_schema.EventPayload.RESPONSE + ) + + if response and hasattr(response, "raw"): + if hasattr(response.raw, "model"): + llm_usage_info.model = response.raw.model + llm_usage_info.provider = "openai" + if hasattr(response.raw, "usage"): + usage_info = response.raw.usage.model_dump() + usage_info.pop("completion_tokens_details", None) + usage_info.pop("prompt_tokens_details", None) + llm_usage_info.usage = usage_info + + return llm_usage_info diff --git a/sdks/python/tests/library_integration/llama_index/test_llama_index.py b/sdks/python/tests/library_integration/llama_index/test_llama_index.py index b0c8fafc12..4068e126eb 100644 --- a/sdks/python/tests/library_integration/llama_index/test_llama_index.py +++ b/sdks/python/tests/library_integration/llama_index/test_llama_index.py @@ -7,8 +7,7 @@ from opik.config import OPIK_PROJECT_DEFAULT_NAME from opik.integrations.llama_index import LlamaIndexCallbackHandler - -from ...testlib import ANY_BUT_NONE, TraceModel, assert_equal +from ...testlib import ANY_BUT_NONE, TraceModel, assert_dict_has_keys, assert_equal @pytest.fixture @@ -89,3 +88,9 @@ def test_llama_index__happyflow( assert len(fake_backend.trace_trees) == 2 assert_equal(EXPECTED_TRACE_TREES, fake_backend.trace_trees) + + # check token usage info + llm_response = fake_backend.trace_trees[1].spans[0].spans[1].spans[3].usage + assert_dict_has_keys( + llm_response, ["completion_tokens", "prompt_tokens", "total_tokens"] + )