Skip to content

Commit

Permalink
[Deploy] Unified timeout key.
Browse files Browse the repository at this point in the history
  • Loading branch information
Raphael-Jin committed Jun 10, 2024
1 parent c151831 commit c29cf1d
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def set_user_setting_replica_num(self, end_point_id,
"target_queries_per_replica": target_queries_per_replica,
"aggregation_window_size_seconds": aggregation_window_size_seconds,
"scale_down_delay_seconds": scale_down_delay_seconds,
"request_timeout_sec": timeout_s
ServerConstants.INFERENCE_REQUEST_TIMEOUT_KEY: timeout_s
}
try:
self.redis_connection.set(self.get_user_setting_replica_num_key(end_point_id), json.dumps(replica_num_dict))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ async def auth_middleware(request: Request, call_next):

# Get the request timeout from the endpoint settings.
request_timeout_s = FEDML_MODEL_CACHE.get_endpoint_settings(end_point_id) \
.get("request_timeout_s", ClientConstants.INFERENCE_REQUEST_TIMEOUT)
.get(ServerConstants.INFERENCE_REQUEST_TIMEOUT_KEY, ServerConstants.INFERENCE_REQUEST_TIMEOUT_DEFAULT)

# Only proceed if the past k metrics collection is not empty.
if pask_k_metrics:
Expand All @@ -76,7 +76,8 @@ async def auth_middleware(request: Request, call_next):
mean_latency = sum(past_k_latencies_sec) / len(past_k_latencies_sec)

# If timeout threshold is exceeded then cancel and return time out error.
if (mean_latency * pending_requests_num) > request_timeout_s:
should_block = (mean_latency * pending_requests_num) > request_timeout_s
if should_block:
return JSONResponse(
{"error": True, "message": "Request timed out."},
status_code=status.HTTP_504_GATEWAY_TIMEOUT)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ class ServerConstants(object):
AUTO_DETECT_PUBLIC_IP = "auto_detect_public_ip"
MODEL_INFERENCE_DEFAULT_PORT = 2203
MODEL_CACHE_KEY_EXPIRE_TIME = 1 * 10

INFERENCE_REQUEST_TIMEOUT_KEY = "request_timeout_sec"
INFERENCE_REQUEST_TIMEOUT_DEFAULT = 30
# -----End-----

MODEL_DEPLOYMENT_STAGE1 = {"index": 1, "text": "ReceivedRequest"}
Expand Down

0 comments on commit c29cf1d

Please sign in to comment.