FedML-AI · Raphael-Jin · Jul 24, 2024 · Jul 22, 2024
diff --git a/python/examples/deploy/custom_inference_image/template.yaml b/python/examples/deploy/custom_inference_image/template.yaml
@@ -1,6 +1,6 @@
 # Required
 workspace: "./"                     # We will pacakge all the files in the workspace directory
-enable_serverless_container: true   # Identify whether to use serverless container
+expose_subdomains: true             # For customized image, if you want to route all the subdomains, set to true. e.g. localhost:2345/{all-subdomain}
 inference_image_name: ""            # Container image name
 container_run_command: ""           # str or list, similar to CMD in the dockerfile
 port: 80                            # Service port, currently you can only indicate one arbitrary port

diff --git a/python/examples/deploy/custom_inference_image/tensorrt_llm/tensorrtllm.yaml b/python/examples/deploy/custom_inference_image/tensorrt_llm/tensorrtllm.yaml
@@ -1,6 +1,6 @@
 workspace: "./"
 
-enable_serverless_container: true
+expose_subdomains: true
 inference_image_name: "fedml/llama3-8b-tensorrtllm"
 
 # If you put the model repository in $workspace/model_repository, it will be mounted to /home/fedml/models_serving/model_repository

diff --git a/...eploy/custom_inference_image/triton_inference_server/template/custom_inference_image.yaml b/...eploy/custom_inference_image/triton_inference_server/template/custom_inference_image.yaml
@@ -1,6 +1,6 @@
 workspace: "./"
 
-enable_serverless_container: true
+expose_subdomains: true
 inference_image_name: "nvcr.io/nvidia/tritonserver:24.05-py3"
 
 volumes:

diff --git a/python/fedml/computing/scheduler/model_scheduler/device_client_constants.py b/python/fedml/computing/scheduler/model_scheduler/device_client_constants.py
@@ -159,7 +159,7 @@ class ClientConstants(object):
     DEPLOY_TIMEOUT_SEC_KEY = "deploy_timeout_sec"
     DEPLOY_TIMEOUT_SEC_DEFAULT = 600
 
-    ENABLE_SERVERLESS_CONTAINER_KEY = "enable_serverless_container"
+    EXPOSE_SUBDOMAINS_KEY = "expose_subdomains"
 
     CUSTOMIZED_VOLUMES_MOUNT_KEY = "volumes"
     CUSTOMIZED_VOLUMES_PATH_FROM_WORKSPACE_KEY = "workspace_path"

diff --git a/python/fedml/computing/scheduler/model_scheduler/device_model_deployment.py b/python/fedml/computing/scheduler/model_scheduler/device_model_deployment.py
@@ -88,7 +88,7 @@ def start_deployment(end_point_id, end_point_name, model_id, model_version,
             registry_user_name, registry_user_password = parse_image_registry_related_config(config)
 
         # Service app related
-        dst_bootstrap_dir, dst_model_serving_dir, relative_entry_fedml_format, enable_serverless_container, \
+        dst_bootstrap_dir, dst_model_serving_dir, relative_entry_fedml_format, expose_subdomains, \
             customized_image_entry_cmd, customized_readiness_check, customized_liveliness_check, customized_uri = \
             handle_container_service_app(config, model_storage_local_path)
 
@@ -255,7 +255,7 @@ def start_deployment(end_point_id, end_point_name, model_id, model_version,
     model_metadata = ret_model_metadata
     model_metadata["liveliness_check"] = customized_liveliness_check
     model_metadata["readiness_check"] = customized_readiness_check
-    model_metadata[ClientConstants.ENABLE_SERVERLESS_CONTAINER_KEY] = enable_serverless_container
+    model_metadata[ClientConstants.EXPOSE_SUBDOMAINS_KEY] = expose_subdomains
     logging.info(f"[Worker][Replica{replica_rank}] Model deployment is successful with inference_output_url: "
                  f"{inference_output_url}, model_metadata: {model_metadata}, model_config: {ret_model_config}")
 
@@ -616,13 +616,13 @@ def handle_container_service_app(config, model_storage_local_path):
     relative_entry_fedml_format = config.get('entry_point', "")
 
     # User indicate either fedml format python main entry filename or entry command
-    enable_serverless_container = config.get(ClientConstants.ENABLE_SERVERLESS_CONTAINER_KEY, False)
+    expose_subdomains = config.get(ClientConstants.EXPOSE_SUBDOMAINS_KEY, False)
     customized_image_entry_cmd = config.get('container_run_command', None)  # Could be str or list
     customized_readiness_check = config.get('readiness_probe', ClientConstants.READINESS_PROBE_DEFAULT)
     customized_liveliness_check = config.get('liveness_probe', ClientConstants.LIVENESS_PROBE_DEFAULT)
     customized_uri = config.get(ClientConstants.CUSTOMIZED_SERVICE_KEY, "")
 
-    return (dst_bootstrap_dir, dst_model_serving_dir, relative_entry_fedml_format, enable_serverless_container,
+    return (dst_bootstrap_dir, dst_model_serving_dir, relative_entry_fedml_format, expose_subdomains,
             customized_image_entry_cmd, customized_readiness_check, customized_liveliness_check, customized_uri)
 
 

diff --git a/python/fedml/computing/scheduler/model_scheduler/device_server_constants.py b/python/fedml/computing/scheduler/model_scheduler/device_server_constants.py
@@ -151,7 +151,7 @@ class ServerConstants(object):
     DEVICE_DIFF_REPLACE_OPERATION = "op: replace"
 
     # Worker comfig yaml related
-    ENABLE_SERVERLESS_CONTAINER_KEY = "enable_serverless_container"
+    EXPOSE_SUBDOMAINS_KEY = "expose_subdomains"
 
     @staticmethod
     def get_fedml_home_dir():

diff --git a/python/fedml/computing/scheduler/model_scheduler/master_job_runner.py b/python/fedml/computing/scheduler/model_scheduler/master_job_runner.py
@@ -763,7 +763,7 @@ def construct_final_gateway_url(self, end_point_id):
         if self.deployed_replica_payload is not None:
             payload_json = self.deployed_replica_payload
             enable_custom_path = payload_json["model_metadata"].get(
-                ServerConstants.ENABLE_SERVERLESS_CONTAINER_KEY, False)
+                ServerConstants.EXPOSE_SUBDOMAINS_KEY, False)
             if enable_custom_path:
                 identifier = "custom_inference"