From e13c39104cc3fca974e2afa207bcca24817f4e17 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 24 Sep 2024 23:20:27 -0700 Subject: [PATCH] [k8s] Autodown Serve controller on Kubernetes (#3984) * Add autodown for skyserve on k8s * lint --- sky/backends/cloud_vm_ray_backend.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index 191a09438aa..0831bad65fb 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -4147,11 +4147,21 @@ def set_autostop(self, idle_minutes_to_autostop >= 0): # We should hit this code path only for the controllers on # Kubernetes and RunPod clusters. - assert (controller_utils.Controllers.from_name( - handle.cluster_name) is not None), handle.cluster_name - logger.info('Auto-stop is not supported for Kubernetes ' - 'and RunPod clusters. Skipping.') - return + controller = controller_utils.Controllers.from_name( + handle.cluster_name) + assert (controller is not None), handle.cluster_name + if (controller + == controller_utils.Controllers.SKY_SERVE_CONTROLLER and + isinstance(handle.launched_resources.cloud, + clouds.Kubernetes)): + # For SkyServe controllers on Kubernetes: override autostop + # behavior to force autodown (instead of no-op) + # to avoid dangling controllers. + down = True + else: + logger.info('Auto-stop is not supported for Kubernetes ' + 'and RunPod clusters. Skipping.') + return # Check if we're stopping spot assert (handle.launched_resources is not None and