diff --git a/python/examples/deploy/custom_inference_image/template.yaml b/python/examples/deploy/custom_inference_image/template.yaml index 10e6580bc..1dd13e153 100644 --- a/python/examples/deploy/custom_inference_image/template.yaml +++ b/python/examples/deploy/custom_inference_image/template.yaml @@ -1,6 +1,6 @@ # Required workspace: "./" # We will pacakge all the files in the workspace directory -enable_serverless_container: true # Identify whether to use serverless container +expose_subdomains: true # For customized image, if you want to route all the subdomains, set to true. e.g. localhost:2345/{all-subdomain} inference_image_name: "" # Container image name container_run_command: "" # str or list, similar to CMD in the dockerfile port: 80 # Service port, currently you can only indicate one arbitrary port diff --git a/python/examples/deploy/custom_inference_image/tensorrt_llm/tensorrtllm.yaml b/python/examples/deploy/custom_inference_image/tensorrt_llm/tensorrtllm.yaml index d41dba798..a72c1f775 100644 --- a/python/examples/deploy/custom_inference_image/tensorrt_llm/tensorrtllm.yaml +++ b/python/examples/deploy/custom_inference_image/tensorrt_llm/tensorrtllm.yaml @@ -1,6 +1,6 @@ workspace: "./" -enable_serverless_container: true +expose_subdomains: true inference_image_name: "fedml/llama3-8b-tensorrtllm" # If you put the model repository in $workspace/model_repository, it will be mounted to /home/fedml/models_serving/model_repository diff --git a/python/examples/deploy/custom_inference_image/triton_inference_server/template/custom_inference_image.yaml b/python/examples/deploy/custom_inference_image/triton_inference_server/template/custom_inference_image.yaml index eb02e3904..11ae9f82f 100644 --- a/python/examples/deploy/custom_inference_image/triton_inference_server/template/custom_inference_image.yaml +++ b/python/examples/deploy/custom_inference_image/triton_inference_server/template/custom_inference_image.yaml @@ -1,6 +1,6 @@ workspace: "./" -enable_serverless_container: true +expose_subdomains: true inference_image_name: "nvcr.io/nvidia/tritonserver:24.05-py3" volumes: diff --git a/python/fedml/computing/scheduler/model_scheduler/device_client_constants.py b/python/fedml/computing/scheduler/model_scheduler/device_client_constants.py index 4006e5072..3bb2e12ae 100644 --- a/python/fedml/computing/scheduler/model_scheduler/device_client_constants.py +++ b/python/fedml/computing/scheduler/model_scheduler/device_client_constants.py @@ -159,7 +159,7 @@ class ClientConstants(object): DEPLOY_TIMEOUT_SEC_KEY = "deploy_timeout_sec" DEPLOY_TIMEOUT_SEC_DEFAULT = 600 - ENABLE_SERVERLESS_CONTAINER_KEY = "enable_serverless_container" + EXPOSE_SUBDOMAINS_KEY = "expose_subdomains" CUSTOMIZED_VOLUMES_MOUNT_KEY = "volumes" CUSTOMIZED_VOLUMES_PATH_FROM_WORKSPACE_KEY = "workspace_path" diff --git a/python/fedml/computing/scheduler/model_scheduler/device_model_deployment.py b/python/fedml/computing/scheduler/model_scheduler/device_model_deployment.py index 665bb4082..6a637653f 100755 --- a/python/fedml/computing/scheduler/model_scheduler/device_model_deployment.py +++ b/python/fedml/computing/scheduler/model_scheduler/device_model_deployment.py @@ -88,7 +88,7 @@ def start_deployment(end_point_id, end_point_name, model_id, model_version, registry_user_name, registry_user_password = parse_image_registry_related_config(config) # Service app related - dst_bootstrap_dir, dst_model_serving_dir, relative_entry_fedml_format, enable_serverless_container, \ + dst_bootstrap_dir, dst_model_serving_dir, relative_entry_fedml_format, expose_subdomains, \ customized_image_entry_cmd, customized_readiness_check, customized_liveliness_check, customized_uri = \ handle_container_service_app(config, model_storage_local_path) @@ -255,7 +255,7 @@ def start_deployment(end_point_id, end_point_name, model_id, model_version, model_metadata = ret_model_metadata model_metadata["liveliness_check"] = customized_liveliness_check model_metadata["readiness_check"] = customized_readiness_check - model_metadata[ClientConstants.ENABLE_SERVERLESS_CONTAINER_KEY] = enable_serverless_container + model_metadata[ClientConstants.EXPOSE_SUBDOMAINS_KEY] = expose_subdomains logging.info(f"[Worker][Replica{replica_rank}] Model deployment is successful with inference_output_url: " f"{inference_output_url}, model_metadata: {model_metadata}, model_config: {ret_model_config}") @@ -616,13 +616,13 @@ def handle_container_service_app(config, model_storage_local_path): relative_entry_fedml_format = config.get('entry_point', "") # User indicate either fedml format python main entry filename or entry command - enable_serverless_container = config.get(ClientConstants.ENABLE_SERVERLESS_CONTAINER_KEY, False) + expose_subdomains = config.get(ClientConstants.EXPOSE_SUBDOMAINS_KEY, False) customized_image_entry_cmd = config.get('container_run_command', None) # Could be str or list customized_readiness_check = config.get('readiness_probe', ClientConstants.READINESS_PROBE_DEFAULT) customized_liveliness_check = config.get('liveness_probe', ClientConstants.LIVENESS_PROBE_DEFAULT) customized_uri = config.get(ClientConstants.CUSTOMIZED_SERVICE_KEY, "") - return (dst_bootstrap_dir, dst_model_serving_dir, relative_entry_fedml_format, enable_serverless_container, + return (dst_bootstrap_dir, dst_model_serving_dir, relative_entry_fedml_format, expose_subdomains, customized_image_entry_cmd, customized_readiness_check, customized_liveliness_check, customized_uri) diff --git a/python/fedml/computing/scheduler/model_scheduler/device_server_constants.py b/python/fedml/computing/scheduler/model_scheduler/device_server_constants.py index 00f0fe73b..44eaeb937 100644 --- a/python/fedml/computing/scheduler/model_scheduler/device_server_constants.py +++ b/python/fedml/computing/scheduler/model_scheduler/device_server_constants.py @@ -151,7 +151,7 @@ class ServerConstants(object): DEVICE_DIFF_REPLACE_OPERATION = "op: replace" # Worker comfig yaml related - ENABLE_SERVERLESS_CONTAINER_KEY = "enable_serverless_container" + EXPOSE_SUBDOMAINS_KEY = "expose_subdomains" @staticmethod def get_fedml_home_dir(): diff --git a/python/fedml/computing/scheduler/model_scheduler/master_job_runner.py b/python/fedml/computing/scheduler/model_scheduler/master_job_runner.py index ab6bc4c89..00b08acfb 100755 --- a/python/fedml/computing/scheduler/model_scheduler/master_job_runner.py +++ b/python/fedml/computing/scheduler/model_scheduler/master_job_runner.py @@ -763,7 +763,7 @@ def construct_final_gateway_url(self, end_point_id): if self.deployed_replica_payload is not None: payload_json = self.deployed_replica_payload enable_custom_path = payload_json["model_metadata"].get( - ServerConstants.ENABLE_SERVERLESS_CONTAINER_KEY, False) + ServerConstants.EXPOSE_SUBDOMAINS_KEY, False) if enable_custom_path: identifier = "custom_inference"