From 9098a1b5672992c633a562bfaac69e35a0a96f3c Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 31 Jan 2025 11:48:39 -0800 Subject: [PATCH 01/10] working streaming usage tracking --- litellm/litellm_core_utils/litellm_logging.py | 5 +- .../litellm_core_utils/streaming_handler.py | 52 ++------ .../test_token_counting.py | 121 ++++++++++++++++++ 3 files changed, 132 insertions(+), 46 deletions(-) create mode 100644 tests/logging_callback_tests/test_token_counting.py diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 7c5638c94564..9b753d710707 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -1544,8 +1544,7 @@ async def async_success_handler( # noqa: PLR0915 Union[ModelResponse, TextCompletionResponse] ] = None if self.stream is True and ( - isinstance(result, litellm.ModelResponse) - or isinstance(result, litellm.ModelResponseStream) + isinstance(result, litellm.ModelResponseStream) or isinstance(result, TextCompletionResponse) ): complete_streaming_response: Optional[ @@ -1558,6 +1557,8 @@ async def async_success_handler( # noqa: PLR0915 streaming_chunks=self.streaming_chunks, is_async=True, ) + if self.stream is True and isinstance(result, ModelResponse): + complete_streaming_response = result if complete_streaming_response is not None: print_verbose("Async success callbacks: Got a complete streaming response") diff --git a/litellm/litellm_core_utils/streaming_handler.py b/litellm/litellm_core_utils/streaming_handler.py index 597b03ac29a4..9cac5e0c8d2e 100644 --- a/litellm/litellm_core_utils/streaming_handler.py +++ b/litellm/litellm_core_utils/streaming_handler.py @@ -1568,21 +1568,6 @@ async def __anext__(self): # noqa: PLR0915 ) if processed_chunk is None: continue - ## LOGGING - ## LOGGING - executor.submit( - self.logging_obj.success_handler, - result=processed_chunk, - start_time=None, - end_time=None, - cache_hit=cache_hit, - ) - - asyncio.create_task( - self.logging_obj.async_success_handler( - processed_chunk, cache_hit=cache_hit - ) - ) if self.logging_obj._llm_caching_handler is not None: asyncio.create_task( @@ -1634,16 +1619,6 @@ async def __anext__(self): # noqa: PLR0915 ) if processed_chunk is None: continue - ## LOGGING - threading.Thread( - target=self.logging_obj.success_handler, - args=(processed_chunk, None, None, cache_hit), - ).start() # log processed_chunk - asyncio.create_task( - self.logging_obj.async_success_handler( - processed_chunk, cache_hit=cache_hit - ) - ) choice = processed_chunk.choices[0] if isinstance(choice, StreamingChoices): @@ -1671,33 +1646,22 @@ async def __anext__(self): # noqa: PLR0915 "usage", getattr(complete_streaming_response, "usage"), ) - ## LOGGING - threading.Thread( - target=self.logging_obj.success_handler, - args=(response, None, None, cache_hit), - ).start() # log response + if self.sent_stream_usage is False and self.send_stream_usage is True: + self.sent_stream_usage = True + return response + asyncio.create_task( self.logging_obj.async_success_handler( - response, cache_hit=cache_hit + complete_streaming_response, + cache_hit=cache_hit, + start_time=None, + end_time=None, ) ) - if self.sent_stream_usage is False and self.send_stream_usage is True: - self.sent_stream_usage = True - return response raise StopAsyncIteration # Re-raise StopIteration else: self.sent_last_chunk = True processed_chunk = self.finish_reason_handler() - ## LOGGING - threading.Thread( - target=self.logging_obj.success_handler, - args=(processed_chunk, None, None, cache_hit), - ).start() # log response - asyncio.create_task( - self.logging_obj.async_success_handler( - processed_chunk, cache_hit=cache_hit - ) - ) return processed_chunk except httpx.TimeoutException as e: # if httpx read timeout error occues traceback_exception = traceback.format_exc() diff --git a/tests/logging_callback_tests/test_token_counting.py b/tests/logging_callback_tests/test_token_counting.py new file mode 100644 index 000000000000..d27cf4bf8c42 --- /dev/null +++ b/tests/logging_callback_tests/test_token_counting.py @@ -0,0 +1,121 @@ +import os +import sys +import traceback +import uuid +import pytest +from dotenv import load_dotenv +from fastapi import Request +from fastapi.routing import APIRoute + +load_dotenv() +import io +import os +import time +import json + +# this file is to test litellm/proxy + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path +import litellm +import asyncio +from typing import Optional +from litellm.types.utils import StandardLoggingPayload, Usage +from litellm.integrations.custom_logger import CustomLogger + + +class TestCustomLogger(CustomLogger): + def __init__(self): + self.recorded_usage: Optional[Usage] = None + + async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): + standard_logging_payload = kwargs.get("standard_logging_object") + print( + "standard_logging_payload", + json.dumps(standard_logging_payload, indent=4, default=str), + ) + + self.recorded_usage = Usage( + prompt_tokens=standard_logging_payload.get("prompt_tokens"), + completion_tokens=standard_logging_payload.get("completion_tokens"), + total_tokens=standard_logging_payload.get("total_tokens"), + ) + pass + + +@pytest.mark.asyncio +async def test_stream_token_counting_gpt_4o(): + """ + When stream_options={"include_usage": True} logging callback tracks Usage == Usage from llm API + """ + custom_logger = TestCustomLogger() + litellm.logging_callback_manager.add_litellm_callback(custom_logger) + + response = await litellm.acompletion( + model="gpt-4o", + messages=[{"role": "user", "content": "Hello, how are you?" * 100}], + stream=True, + stream_options={"include_usage": True}, + ) + + actual_usage = None + async for chunk in response: + if "usage" in chunk: + actual_usage = chunk["usage"] + print("chunk.usage", json.dumps(chunk["usage"], indent=4, default=str)) + pass + + await asyncio.sleep(2) + + print("\n\n\n\n\n") + print( + "recorded_usage", + json.dumps(custom_logger.recorded_usage, indent=4, default=str), + ) + print("\n\n\n\n\n") + + assert actual_usage.prompt_tokens == custom_logger.recorded_usage.prompt_tokens + assert ( + actual_usage.completion_tokens == custom_logger.recorded_usage.completion_tokens + ) + assert actual_usage.total_tokens == custom_logger.recorded_usage.total_tokens + + +@pytest.mark.asyncio +async def test_stream_token_counting_without_include_usage(): + """ + When stream_options={"include_usage": True} is not passed, the usage tracked == usage from llm api chunk + + by default, litellm passes `include_usage=True` for OpenAI API + """ + custom_logger = TestCustomLogger() + litellm.logging_callback_manager.add_litellm_callback(custom_logger) + + response = await litellm.acompletion( + model="gpt-4o", + messages=[{"role": "user", "content": "Hello, how are you?" * 100}], + stream=True, + ) + + actual_usage = None + async for chunk in response: + if "usage" in chunk: + actual_usage = chunk["usage"] + print("chunk.usage", json.dumps(chunk["usage"], indent=4, default=str)) + pass + + await asyncio.sleep(2) + + print("\n\n\n\n\n") + print( + "recorded_usage", + json.dumps(custom_logger.recorded_usage, indent=4, default=str), + ) + print("\n\n\n\n\n") + + assert actual_usage.prompt_tokens == custom_logger.recorded_usage.prompt_tokens + assert ( + actual_usage.completion_tokens == custom_logger.recorded_usage.completion_tokens + ) + assert actual_usage.total_tokens == custom_logger.recorded_usage.total_tokens From 4e13d32e6dccf6f9509de66d2d76c11f0a618873 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 31 Jan 2025 11:58:46 -0800 Subject: [PATCH 02/10] fix test_async_chat_openai_stream_options --- tests/local_testing/test_custom_callback_input.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/local_testing/test_custom_callback_input.py b/tests/local_testing/test_custom_callback_input.py index 8343b63c9dac..64b6ea53561c 100644 --- a/tests/local_testing/test_custom_callback_input.py +++ b/tests/local_testing/test_custom_callback_input.py @@ -540,6 +540,8 @@ async def test_async_chat_openai_stream_options(): async for chunk in response: continue + + await asyncio.sleep(1) print("mock client args list=", mock_client.await_args_list) mock_client.assert_awaited_once() except Exception as e: From bfc73d3e6aca28b7c9d9e6e8b38b938df6adac9c Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 31 Jan 2025 12:02:09 -0800 Subject: [PATCH 03/10] fix await asyncio.sleep(1) --- .../test_custom_callback_input.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/local_testing/test_custom_callback_input.py b/tests/local_testing/test_custom_callback_input.py index 64b6ea53561c..034ff7b9b4f3 100644 --- a/tests/local_testing/test_custom_callback_input.py +++ b/tests/local_testing/test_custom_callback_input.py @@ -418,6 +418,8 @@ async def test_async_chat_openai_stream(): ) async for chunk in response: continue + + await asyncio.sleep(1) ## test failure callback try: response = await litellm.acompletion( @@ -428,6 +430,7 @@ async def test_async_chat_openai_stream(): ) async for chunk in response: continue + await asyncio.sleep(1) except Exception: pass time.sleep(1) @@ -499,6 +502,8 @@ async def test_async_chat_azure_stream(): ) async for chunk in response: continue + + await asyncio.sleep(1) # test failure callback try: response = await litellm.acompletion( @@ -509,6 +514,7 @@ async def test_async_chat_azure_stream(): ) async for chunk in response: continue + await asyncio.sleep(1) except Exception: pass await asyncio.sleep(1) @@ -609,6 +615,8 @@ async def test_async_chat_bedrock_stream(): async for chunk in response: print(f"chunk: {chunk}") continue + + await asyncio.sleep(1) ## test failure callback try: response = await litellm.acompletion( @@ -619,6 +627,8 @@ async def test_async_chat_bedrock_stream(): ) async for chunk in response: continue + + await asyncio.sleep(1) except Exception: pass await asyncio.sleep(1) @@ -772,6 +782,8 @@ async def test_async_text_completion_bedrock(): async for chunk in response: print(f"chunk: {chunk}") continue + + await asyncio.sleep(1) ## test failure callback try: response = await litellm.atext_completion( @@ -782,6 +794,8 @@ async def test_async_text_completion_bedrock(): ) async for chunk in response: continue + + await asyncio.sleep(1) except Exception: pass time.sleep(1) @@ -811,6 +825,8 @@ async def test_async_text_completion_openai_stream(): async for chunk in response: print(f"chunk: {chunk}") continue + + await asyncio.sleep(1) ## test failure callback try: response = await litellm.atext_completion( @@ -821,6 +837,8 @@ async def test_async_text_completion_openai_stream(): ) async for chunk in response: continue + + await asyncio.sleep(1) except Exception: pass time.sleep(1) From 6516afc196bc9adeb2652af6fbe58a37ffc13a60 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 31 Jan 2025 13:12:02 -0800 Subject: [PATCH 04/10] test_async_chat_azure --- tests/local_testing/test_custom_callback_router.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/local_testing/test_custom_callback_router.py b/tests/local_testing/test_custom_callback_router.py index 2234690101a1..310a49792262 100644 --- a/tests/local_testing/test_custom_callback_router.py +++ b/tests/local_testing/test_custom_callback_router.py @@ -381,7 +381,7 @@ async def async_log_failure_event(self, kwargs, response_obj, start_time, end_ti # Simple Azure OpenAI call ## COMPLETION -@pytest.mark.flaky(retries=5, delay=1) +# @pytest.mark.flaky(retries=5, delay=1) @pytest.mark.asyncio async def test_async_chat_azure(): try: @@ -427,11 +427,11 @@ async def test_async_chat_azure(): async for chunk in response: print(f"async azure router chunk: {chunk}") continue - await asyncio.sleep(1) + await asyncio.sleep(2) print(f"customHandler.states: {customHandler_streaming_azure_router.states}") assert len(customHandler_streaming_azure_router.errors) == 0 assert ( - len(customHandler_streaming_azure_router.states) >= 4 + len(customHandler_streaming_azure_router.states) >= 3 ) # pre, post, stream (multiple times), success # failure model_list = [ From ff848fa639dc8ca2eecf718538d346b5d070ee00 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 31 Jan 2025 13:27:40 -0800 Subject: [PATCH 05/10] fix s3 logging --- litellm/litellm_core_utils/litellm_logging.py | 72 ++++++++++--------- .../litellm_core_utils/streaming_handler.py | 15 ++-- .../thread_pool_executor.py | 5 ++ litellm/utils.py | 6 +- 4 files changed, 56 insertions(+), 42 deletions(-) create mode 100644 litellm/litellm_core_utils/thread_pool_executor.py diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 9b753d710707..45b63177b974 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -1029,21 +1029,13 @@ def success_handler( # noqa: PLR0915 ] = None if "complete_streaming_response" in self.model_call_details: return # break out of this. - if self.stream and ( - isinstance(result, litellm.ModelResponse) - or isinstance(result, TextCompletionResponse) - or isinstance(result, ModelResponseStream) - ): - complete_streaming_response: Optional[ - Union[ModelResponse, TextCompletionResponse] - ] = _assemble_complete_response_from_streaming_chunks( - result=result, - start_time=start_time, - end_time=end_time, - request_kwargs=self.model_call_details, - streaming_chunks=self.sync_streaming_chunks, - is_async=False, - ) + complete_streaming_response = self._get_assembled_streaming_response( + result=result, + start_time=start_time, + end_time=end_time, + is_async=False, + streaming_chunks=self.sync_streaming_chunks, + ) if complete_streaming_response is not None: verbose_logger.debug( "Logging Details LiteLLM-Success Call streaming complete" @@ -1542,23 +1534,13 @@ async def async_success_handler( # noqa: PLR0915 return # break out of this. complete_streaming_response: Optional[ Union[ModelResponse, TextCompletionResponse] - ] = None - if self.stream is True and ( - isinstance(result, litellm.ModelResponseStream) - or isinstance(result, TextCompletionResponse) - ): - complete_streaming_response: Optional[ - Union[ModelResponse, TextCompletionResponse] - ] = _assemble_complete_response_from_streaming_chunks( - result=result, - start_time=start_time, - end_time=end_time, - request_kwargs=self.model_call_details, - streaming_chunks=self.streaming_chunks, - is_async=True, - ) - if self.stream is True and isinstance(result, ModelResponse): - complete_streaming_response = result + ] = self._get_assembled_streaming_response( + result=result, + start_time=start_time, + end_time=end_time, + is_async=True, + streaming_chunks=self.streaming_chunks, + ) if complete_streaming_response is not None: print_verbose("Async success callbacks: Got a complete streaming response") @@ -2260,6 +2242,32 @@ def _remove_internal_custom_logger_callbacks(self, callbacks: List) -> List: _new_callbacks.append(_c) return _new_callbacks + def _get_assembled_streaming_response( + self, + result: Union[ModelResponse, TextCompletionResponse, ModelResponseStream, Any], + start_time: datetime.datetime, + end_time: datetime.datetime, + is_async: bool, + streaming_chunks: List[Any], + ) -> Optional[Union[ModelResponse, TextCompletionResponse]]: + if isinstance(result, ModelResponse): + return result + elif isinstance(result, TextCompletionResponse): + return result + elif isinstance(result, ModelResponseStream): + complete_streaming_response: Optional[ + Union[ModelResponse, TextCompletionResponse] + ] = _assemble_complete_response_from_streaming_chunks( + result=result, + start_time=start_time, + end_time=end_time, + request_kwargs=self.model_call_details, + streaming_chunks=streaming_chunks, + is_async=is_async, + ) + return complete_streaming_response + return None + def set_callbacks(callback_list, function_id=None): # noqa: PLR0915 """ diff --git a/litellm/litellm_core_utils/streaming_handler.py b/litellm/litellm_core_utils/streaming_handler.py index 9cac5e0c8d2e..1ebef1d1847c 100644 --- a/litellm/litellm_core_utils/streaming_handler.py +++ b/litellm/litellm_core_utils/streaming_handler.py @@ -14,6 +14,7 @@ import litellm from litellm import verbose_logger from litellm.litellm_core_utils.redact_messages import LiteLLMLoggingObject +from litellm.litellm_core_utils.thread_pool_executor import executor from litellm.types.utils import Delta from litellm.types.utils import GenericStreamingChunk as GChunk from litellm.types.utils import ( @@ -29,11 +30,6 @@ from .llm_response_utils.get_api_base import get_api_base from .rules import Rules -MAX_THREADS = 100 - -# Create a ThreadPoolExecutor -executor = ThreadPoolExecutor(max_workers=MAX_THREADS) - def is_async_iterable(obj: Any) -> bool: """ @@ -1658,6 +1654,15 @@ async def __anext__(self): # noqa: PLR0915 end_time=None, ) ) + + executor.submit( + self.logging_obj.success_handler, + complete_streaming_response, + cache_hit=cache_hit, + start_time=None, + end_time=None, + ) + raise StopAsyncIteration # Re-raise StopIteration else: self.sent_last_chunk = True diff --git a/litellm/litellm_core_utils/thread_pool_executor.py b/litellm/litellm_core_utils/thread_pool_executor.py new file mode 100644 index 000000000000..b7c630b20d80 --- /dev/null +++ b/litellm/litellm_core_utils/thread_pool_executor.py @@ -0,0 +1,5 @@ +from concurrent.futures import ThreadPoolExecutor + +MAX_THREADS = 100 +# Create a ThreadPoolExecutor +executor = ThreadPoolExecutor(max_workers=MAX_THREADS) diff --git a/litellm/utils.py b/litellm/utils.py index 5396e008f0d8..0a69c861bd40 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -166,7 +166,6 @@ # Convert to str (if necessary) claude_json_str = json.dumps(json_data) import importlib.metadata -from concurrent.futures import ThreadPoolExecutor from typing import ( TYPE_CHECKING, Any, @@ -185,6 +184,7 @@ from openai import OpenAIError as OriginalError +from litellm.litellm_core_utils.thread_pool_executor import executor from litellm.llms.base_llm.audio_transcription.transformation import ( BaseAudioTranscriptionConfig, ) @@ -235,10 +235,6 @@ ####### ENVIRONMENT VARIABLES #################### # Adjust to your specific application needs / system capabilities. -MAX_THREADS = 100 - -# Create a ThreadPoolExecutor -executor = ThreadPoolExecutor(max_workers=MAX_THREADS) sentry_sdk_instance = None capture_exception = None add_breadcrumb = None From 545feeeb85b0e6912254323f9ccc86404a950351 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 31 Jan 2025 13:38:15 -0800 Subject: [PATCH 06/10] fix get_stream_options --- litellm/llms/openai/openai.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/litellm/llms/openai/openai.py b/litellm/llms/openai/openai.py index aa361422fe8a..382bcc8394b7 100644 --- a/litellm/llms/openai/openai.py +++ b/litellm/llms/openai/openai.py @@ -833,8 +833,9 @@ def streaming( stream_options: Optional[dict] = None, ): data["stream"] = True - if stream_options is not None: - data["stream_options"] = stream_options + data.update( + self.get_stream_options(stream_options=stream_options, api_base=api_base) + ) openai_client: OpenAI = self._get_openai_client( # type: ignore is_async=False, @@ -893,8 +894,9 @@ async def async_streaming( ): response = None data["stream"] = True - if stream_options is not None: - data["stream_options"] = stream_options + data.update( + self.get_stream_options(stream_options=stream_options, api_base=api_base) + ) for _ in range(2): try: openai_aclient: AsyncOpenAI = self._get_openai_client( # type: ignore @@ -977,6 +979,17 @@ async def async_streaming( status_code=500, message=f"{str(e)}", headers=error_headers ) + def get_stream_options( + self, stream_options: Optional[dict], api_base: Optional[str] + ) -> dict: + if stream_options is not None: + return {"stream_options": stream_options} + else: + # by default litellm will include usage for openai endpoints + if api_base is None or "api.openai.com" in api_base: + return {"stream_options": {"include_usage": True}} + return {} + # Embedding @track_llm_api_timing() async def make_openai_embedding_request( From 5cc1a333ba52514b8cabf4b8414562b3636b6812 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 31 Jan 2025 13:39:33 -0800 Subject: [PATCH 07/10] fix get_stream_options --- litellm/llms/openai/openai.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/litellm/llms/openai/openai.py b/litellm/llms/openai/openai.py index 382bcc8394b7..4aa00dfd16fa 100644 --- a/litellm/llms/openai/openai.py +++ b/litellm/llms/openai/openai.py @@ -982,6 +982,9 @@ async def async_streaming( def get_stream_options( self, stream_options: Optional[dict], api_base: Optional[str] ) -> dict: + """ + Pass `stream_options` to the data dict for OpenAI requests + """ if stream_options is not None: return {"stream_options": stream_options} else: From a84d1899f2fc3d5666dea6717e0f0f7c10b8e224 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 31 Jan 2025 13:54:09 -0800 Subject: [PATCH 08/10] fix streaming handler --- litellm/litellm_core_utils/streaming_handler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/litellm/litellm_core_utils/streaming_handler.py b/litellm/litellm_core_utils/streaming_handler.py index 1ebef1d1847c..08356fea73aa 100644 --- a/litellm/litellm_core_utils/streaming_handler.py +++ b/litellm/litellm_core_utils/streaming_handler.py @@ -5,7 +5,6 @@ import time import traceback import uuid -from concurrent.futures import ThreadPoolExecutor from typing import Any, Callable, Dict, List, Optional, cast import httpx From 9edc33521b31e144f59789b0f602812c2fafdaf2 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 31 Jan 2025 13:59:45 -0800 Subject: [PATCH 09/10] test_stream_token_counting_with_redaction --- .../test_token_counting.py | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/logging_callback_tests/test_token_counting.py b/tests/logging_callback_tests/test_token_counting.py index d27cf4bf8c42..bce938a67049 100644 --- a/tests/logging_callback_tests/test_token_counting.py +++ b/tests/logging_callback_tests/test_token_counting.py @@ -119,3 +119,41 @@ async def test_stream_token_counting_without_include_usage(): actual_usage.completion_tokens == custom_logger.recorded_usage.completion_tokens ) assert actual_usage.total_tokens == custom_logger.recorded_usage.total_tokens + + +@pytest.mark.asyncio +async def test_stream_token_counting_with_redaction(): + """ + When litellm.turn_off_message_logging=True is used, the usage tracked == usage from llm api chunk + """ + litellm.turn_off_message_logging = True + custom_logger = TestCustomLogger() + litellm.logging_callback_manager.add_litellm_callback(custom_logger) + + response = await litellm.acompletion( + model="gpt-4o", + messages=[{"role": "user", "content": "Hello, how are you?" * 100}], + stream=True, + ) + + actual_usage = None + async for chunk in response: + if "usage" in chunk: + actual_usage = chunk["usage"] + print("chunk.usage", json.dumps(chunk["usage"], indent=4, default=str)) + pass + + await asyncio.sleep(2) + + print("\n\n\n\n\n") + print( + "recorded_usage", + json.dumps(custom_logger.recorded_usage, indent=4, default=str), + ) + print("\n\n\n\n\n") + + assert actual_usage.prompt_tokens == custom_logger.recorded_usage.prompt_tokens + assert ( + actual_usage.completion_tokens == custom_logger.recorded_usage.completion_tokens + ) + assert actual_usage.total_tokens == custom_logger.recorded_usage.total_tokens From ffab6190bdc6693195f9521141081d3ea9453544 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 31 Jan 2025 14:54:32 -0800 Subject: [PATCH 10/10] fix codeql concern --- litellm/llms/openai/openai.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/litellm/llms/openai/openai.py b/litellm/llms/openai/openai.py index 4aa00dfd16fa..eb095661a833 100644 --- a/litellm/llms/openai/openai.py +++ b/litellm/llms/openai/openai.py @@ -14,6 +14,7 @@ Union, cast, ) +from urllib.parse import urlparse import httpx import openai @@ -989,7 +990,7 @@ def get_stream_options( return {"stream_options": stream_options} else: # by default litellm will include usage for openai endpoints - if api_base is None or "api.openai.com" in api_base: + if api_base is None or urlparse(api_base).hostname == "api.openai.com": return {"stream_options": {"include_usage": True}} return {}