Skip to content

Commit

Permalink
Retry for replicate completion response of status=processing (#7901)
Browse files Browse the repository at this point in the history
We use the DEFAULT_REPLICATE_ constants for retry count and initial
delay. If the completion response returns status=processing, we
loop to retry.

Fixes #7900

Signed-off-by: BJ Hargrave <[email protected]>
  • Loading branch information
bjhargrave authored Jan 24, 2025
1 parent c6e9240 commit d98f132
Showing 1 changed file with 16 additions and 6 deletions.
22 changes: 16 additions & 6 deletions litellm/llms/replicate/chat/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,11 +196,16 @@ def completion(
)
return CustomStreamWrapper(_response, model, logging_obj=logging_obj, custom_llm_provider="replicate") # type: ignore
else:
for _ in range(litellm.DEFAULT_MAX_RETRIES):
for retry in range(litellm.DEFAULT_REPLICATE_POLLING_RETRIES):
time.sleep(
1
) # wait 1s to allow response to be generated by replicate - else partial output is generated with status=="processing"
litellm.DEFAULT_REPLICATE_POLLING_DELAY_SECONDS + 2 * retry
) # wait to allow response to be generated by replicate - else partial output is generated with status=="processing"
response = httpx_client.get(url=prediction_url, headers=headers)
if (
response.status_code == 200
and response.json().get("status") == "processing"
):
continue
return litellm.ReplicateConfig().transform_response(
model=model,
raw_response=response,
Expand Down Expand Up @@ -259,11 +264,16 @@ async def async_completion(
)
return CustomStreamWrapper(_response, model, logging_obj=logging_obj, custom_llm_provider="replicate") # type: ignore

for _ in range(litellm.DEFAULT_REPLICATE_POLLING_RETRIES):
for retry in range(litellm.DEFAULT_REPLICATE_POLLING_RETRIES):
await asyncio.sleep(
litellm.DEFAULT_REPLICATE_POLLING_DELAY_SECONDS
) # wait 1s to allow response to be generated by replicate - else partial output is generated with status=="processing"
litellm.DEFAULT_REPLICATE_POLLING_DELAY_SECONDS + 2 * retry
) # wait to allow response to be generated by replicate - else partial output is generated with status=="processing"
response = await async_handler.get(url=prediction_url, headers=headers)
if (
response.status_code == 200
and response.json().get("status") == "processing"
):
continue
return litellm.ReplicateConfig().transform_response(
model=model,
raw_response=response,
Expand Down

0 comments on commit d98f132

Please sign in to comment.