Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Improve retry mechanism for _get_ai_reply and refactor method #2113

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 45 additions & 44 deletions letta/agent.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime
import inspect
import time
import traceback
import warnings
from abc import ABC, abstractmethod
Expand Down Expand Up @@ -522,60 +523,60 @@ def _get_ai_reply(
self,
message_sequence: List[Message],
function_call: str = "auto",
first_message: bool = False, # hint
first_message: bool = False,
stream: bool = False, # TODO move to config?
fail_on_empty_response: bool = False,
empty_response_retry_limit: int = 3,
backoff_factor: float = 0.5, # delay multiplier for exponential backoff
max_delay: float = 10.0, # max delay between retries
) -> ChatCompletionResponse:
"""Get response from LLM API"""
# Get the allowed tools based on the ToolRulesSolver state
"""Get response from LLM API with robust retry mechanism."""

allowed_tool_names = self.tool_rules_solver.get_allowed_tool_names()
allowed_functions = (
self.functions if not allowed_tool_names else [func for func in self.functions if func["name"] in allowed_tool_names]
)

if not allowed_tool_names:
# if it's empty, any available tools are fair game
allowed_functions = self.functions
else:
allowed_functions = [func for func in self.functions if func["name"] in allowed_tool_names]
for attempt in range(1, empty_response_retry_limit + 1):
try:
response = create(
llm_config=self.agent_state.llm_config,
messages=message_sequence,
user_id=self.agent_state.user_id,
functions=allowed_functions,
functions_python=self.functions_python,
function_call=function_call,
first_message=first_message,
stream=stream,
stream_interface=self.interface,
)

try:
response = create(
# agent_state=self.agent_state,
llm_config=self.agent_state.llm_config,
messages=message_sequence,
user_id=self.agent_state.user_id,
functions=allowed_functions,
functions_python=self.functions_python,
function_call=function_call,
# hint
first_message=first_message,
# streaming
stream=stream,
stream_interface=self.interface,
)
# These bottom two are retryable
if len(response.choices) == 0 or response.choices[0] is None:
raise ValueError(f"API call returned an empty message: {response}")

if len(response.choices) == 0 or response.choices[0] is None:
empty_api_err_message = f"API call didn't return a message: {response}"
if fail_on_empty_response or empty_response_retry_limit == 0:
raise Exception(empty_api_err_message)
else:
# Decrement retry limit and try again
warnings.warn(empty_api_err_message)
return self._get_ai_reply(
message_sequence, function_call, first_message, stream, fail_on_empty_response, empty_response_retry_limit - 1
)
if response.choices[0].finish_reason not in ["stop", "function_call", "tool_calls"]:
if response.choices[0].finish_reason == "length":
# This is not retryable, hence RuntimeError v.s. ValueError
raise RuntimeError("Finish reason was length (maximum context length)")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will this get caught by the summarizer?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cpacker

Seems to pass summarizer tests though

else:
raise ValueError(f"Bad finish reason from API: {response.choices[0].finish_reason}")

return response

# special case for 'length'
if response.choices[0].finish_reason == "length":
raise Exception("Finish reason was length (maximum context length)")
except ValueError as ve:
if attempt >= empty_response_retry_limit:
warnings.warn(f"Retry limit reached. Final error: {ve}")
break
else:
delay = min(backoff_factor * (2 ** (attempt - 1)), max_delay)
warnings.warn(f"Attempt {attempt} failed: {ve}. Retrying in {delay} seconds...")
time.sleep(delay)

# catches for soft errors
if response.choices[0].finish_reason not in ["stop", "function_call", "tool_calls"]:
raise Exception(f"API call finish with bad finish reason: {response}")
except Exception as e:
# For non-retryable errors, exit immediately
raise e

# unpack with response.choices[0].message.content
return response
except Exception as e:
raise e
raise Exception("Retries exhausted and no valid response received.")

def _handle_ai_response(
self,
Expand Down
Loading