[Deploy] Unified timeout key.

FedML-AI · Jun 10, 2024 · c29cf1d · c29cf1d
1 parent c151831
commit c29cf1d
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 3 deletions.
diff --git a/python/fedml/computing/scheduler/model_scheduler/device_model_cache.py b/python/fedml/computing/scheduler/model_scheduler/device_model_cache.py
@@ -139,7 +139,7 @@ def set_user_setting_replica_num(self, end_point_id,
             "target_queries_per_replica": target_queries_per_replica,
             "aggregation_window_size_seconds": aggregation_window_size_seconds,
             "scale_down_delay_seconds": scale_down_delay_seconds,
-            "request_timeout_sec": timeout_s
+            ServerConstants.INFERENCE_REQUEST_TIMEOUT_KEY: timeout_s
         }
         try:
             self.redis_connection.set(self.get_user_setting_replica_num_key(end_point_id), json.dumps(replica_num_dict))

diff --git a/python/fedml/computing/scheduler/model_scheduler/device_model_inference.py b/python/fedml/computing/scheduler/model_scheduler/device_model_inference.py
@@ -66,7 +66,7 @@ async def auth_middleware(request: Request, call_next):
 
             # Get the request timeout from the endpoint settings.
             request_timeout_s = FEDML_MODEL_CACHE.get_endpoint_settings(end_point_id) \
-                .get("request_timeout_s", ClientConstants.INFERENCE_REQUEST_TIMEOUT)
+                .get(ServerConstants.INFERENCE_REQUEST_TIMEOUT_KEY, ServerConstants.INFERENCE_REQUEST_TIMEOUT_DEFAULT)
 
             # Only proceed if the past k metrics collection is not empty.
             if pask_k_metrics:
@@ -76,7 +76,8 @@ async def auth_middleware(request: Request, call_next):
                 mean_latency = sum(past_k_latencies_sec) / len(past_k_latencies_sec)
 
                 # If timeout threshold is exceeded then cancel and return time out error.
-                if (mean_latency * pending_requests_num) > request_timeout_s:
+                should_block = (mean_latency * pending_requests_num) > request_timeout_s
+                if should_block:
                     return JSONResponse(
                         {"error": True, "message": "Request timed out."},
                         status_code=status.HTTP_504_GATEWAY_TIMEOUT)

diff --git a/python/fedml/computing/scheduler/model_scheduler/device_server_constants.py b/python/fedml/computing/scheduler/model_scheduler/device_server_constants.py
@@ -104,6 +104,9 @@ class ServerConstants(object):
     AUTO_DETECT_PUBLIC_IP = "auto_detect_public_ip"
     MODEL_INFERENCE_DEFAULT_PORT = 2203
     MODEL_CACHE_KEY_EXPIRE_TIME = 1 * 10
+
+    INFERENCE_REQUEST_TIMEOUT_KEY = "request_timeout_sec"
+    INFERENCE_REQUEST_TIMEOUT_DEFAULT = 30
     # -----End-----
 
     MODEL_DEPLOYMENT_STAGE1 = {"index": 1, "text": "ReceivedRequest"}