Skip to content

Commit

Permalink
[Serve] Fix serve.shutdown() to delete deployments from all applicati…
Browse files Browse the repository at this point in the history
…ons (ray-project#32631)

`serve.shutdown()` only deletes deployments from app with `name=""`. This fix makes sure it deletes all deployments.
  • Loading branch information
zcin authored and peytondmurray committed Mar 22, 2023
1 parent e48894c commit 048dd9d
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 1 deletion.
12 changes: 11 additions & 1 deletion python/ray/serve/_private/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def _wait_for_deployments_shutdown(self, timeout_s: int = 60):
"""
start = time.time()
while time.time() - start < timeout_s:
deployment_statuses = self.get_serve_status().deployment_statuses
deployment_statuses = self.get_all_deployment_statuses()
if len(deployment_statuses) == 0:
break
else:
Expand Down Expand Up @@ -406,6 +406,16 @@ def get_serve_status(self, name: str = SERVE_DEFAULT_APP_NAME) -> StatusOverview
)
return StatusOverview.from_proto(proto)

@_ensure_connected
def get_all_deployment_statuses(self) -> List[DeploymentStatusInfo]:
statuses_bytes = ray.get(self._controller.get_all_deployment_statuses.remote())
return [
DeploymentStatusInfo.from_proto(
DeploymentStatusInfoProto.FromString(status_bytes)
)
for status_bytes in statuses_bytes
]

@_ensure_connected
def get_handle(
self,
Expand Down
5 changes: 5 additions & 0 deletions python/ray/serve/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,11 @@ def get_app_config(self, name: str = SERVE_DEFAULT_APP_NAME) -> Dict:
.dict(exclude_unset=True)
)

def get_all_deployment_statuses(self) -> List[bytes]:
"""Gets deployment status bytes for all live deployments."""
statuses = self.deployment_state_manager.get_deployment_statuses()
return [status.to_proto().SerializeToString() for status in statuses]

def get_deployment_status(self, name: str) -> Union[None, bytes]:
"""Get deployment status by deployment name"""
status = self.deployment_state_manager.get_deployment_statuses([name])
Expand Down
116 changes: 116 additions & 0 deletions python/ray/serve/tests/test_standalone.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,122 @@ def check_dead():
wait_for_condition(check_dead)


def test_v1_shutdown_actors(ray_shutdown):
"""Tests serve.shutdown() works correctly in 1.x case.
Ensures that after deploying deployments using 1.x API, serve.shutdown()
deletes all actors (controller, http proxy, all replicas) in the "serve" namespace.
"""
ray.init(num_cpus=16)
serve.start(http_options=dict(port=8003), detached=True)

@serve.deployment
def f():
pass

f.deploy()

actor_names = {
"ServeController",
"HTTPProxyActor",
"ServeReplica:f",
}

def check_alive():
actors = list_actors(
filters=[("ray_namespace", "=", SERVE_NAMESPACE), ("state", "=", "ALIVE")]
)
return {actor["class_name"] for actor in actors} == actor_names

def check_dead():
actors = list_actors(
filters=[("ray_namespace", "=", SERVE_NAMESPACE), ("state", "=", "ALIVE")]
)
return len(actors) == 0

wait_for_condition(check_alive)
serve.shutdown()
wait_for_condition(check_dead)


def test_single_app_shutdown_actors(ray_shutdown):
"""Tests serve.shutdown() works correctly in single-app case
Ensures that after deploying a (nameless) app using serve.run(), serve.shutdown()
deletes all actors (controller, http proxy, all replicas) in the "serve" namespace.
"""
ray.init(num_cpus=16)
serve.start(http_options=dict(port=8003), detached=True)

@serve.deployment
def f():
pass

serve.run(f.bind())

actor_names = {
"ServeController",
"HTTPProxyActor",
"ServeReplica:f",
}

def check_alive():
actors = list_actors(
filters=[("ray_namespace", "=", SERVE_NAMESPACE), ("state", "=", "ALIVE")]
)
return {actor["class_name"] for actor in actors} == actor_names

def check_dead():
actors = list_actors(
filters=[("ray_namespace", "=", SERVE_NAMESPACE), ("state", "=", "ALIVE")]
)
return len(actors) == 0

wait_for_condition(check_alive)
serve.shutdown()
wait_for_condition(check_dead)


def test_multi_app_shutdown_actors(ray_shutdown):
"""Tests serve.shutdown() works correctly in multi-app case.
Ensures that after deploying multiple distinct applications, serve.shutdown()
deletes all actors (controller, http proxy, all replicas) in the "serve" namespace.
"""
ray.init(num_cpus=16)
serve.start(http_options=dict(port=8003), detached=True)

@serve.deployment
def f():
pass

serve.run(f.bind(), name="app1", route_prefix="/app1")
serve.run(f.bind(), name="app2", route_prefix="/app2")

actor_names = {
"ServeController",
"HTTPProxyActor",
"ServeReplica:app1_f",
"ServeReplica:app2_f",
}

def check_alive():
actors = list_actors(
filters=[("ray_namespace", "=", SERVE_NAMESPACE), ("state", "=", "ALIVE")]
)
return {actor["class_name"] for actor in actors} == actor_names

def check_dead():
actors = list_actors(
filters=[("ray_namespace", "=", SERVE_NAMESPACE), ("state", "=", "ALIVE")]
)
return len(actors) == 0

wait_for_condition(check_alive)
serve.shutdown()
wait_for_condition(check_dead)


def test_detached_deployment(ray_cluster):
# https://github.com/ray-project/ray/issues/11437

Expand Down

0 comments on commit 048dd9d

Please sign in to comment.