Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SkyServe] Support min_replicas = 0 #2938

Merged
merged 10 commits into from
Jan 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions examples/serve/min_replicas_zero.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# SkyServe YAML to test min_replicas=0 with a simple http server.
# The service will be initialized with no replica (min_replicas = 0).
# Any traffic to the service will trigger an immediate scale-up.
# The service will be scaled down to 0 replica when there is no traffic
# for a long time.
#
# Usage:
# sky serve up -n min_replicas examples/serve/min_replicas_zero.yaml
# The endpoint will be printed in the console.
# Querying the endpoint will trigger a scale up.
Comment on lines +9 to +10
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add docstr for how to make the replicas to 0, i.e. don;t send any traffic for how many time

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. updated


service:
readiness_probe:
path: /health
initial_delay_seconds: 20
replica_policy:
min_replicas: 0
max_replicas: 2
target_qps_per_replica: 1

resources:
ports: 8081
cpus: 2+

workdir: examples/serve/http_server

run: python3 server.py
3 changes: 3 additions & 0 deletions sky/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4386,6 +4386,9 @@ def serve_status(all: bool, endpoint: bool, service_names: List[str]):
down. This usually indicates resource leakages. If you see such status,
please login to the cloud console and double-check

- ``NO_REPLICAS``: The service has no replicas. This usually happens when
min_replicas is set to 0 and there is no traffic to the system.

Each replica can have one of the following statuses:

- ``PENDING``: The maximum number of simultaneous launches has been reached
Expand Down
10 changes: 9 additions & 1 deletion sky/serve/autoscalers.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def _get_desired_num_replicas(self) -> int:
logger.info(f'Requests per second: {num_requests_per_second}, '
f'Current target number of replicas: {target_num_replicas}')

if not self.bootstrap_done:
if not self.bootstrap_done or self.target_num_replicas == 0:
self.bootstrap_done = True
return target_num_replicas
elif target_num_replicas > self.target_num_replicas:
Expand All @@ -173,6 +173,14 @@ def _get_desired_num_replicas(self) -> int:
self.upscale_counter = self.downscale_counter = 0
return self.target_num_replicas

def get_decision_interval(self) -> int:
# Reduce autoscaler interval when target_num_replicas = 0.
# This will happen when min_replicas = 0 and no traffic.
if self.target_num_replicas == 0:
return constants.AUTOSCALER_NO_REPLICA_DECISION_INTERVAL_SECONDS
else:
return constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS

def evaluate_scaling(
self,
replica_infos: List['replica_managers.ReplicaInfo'],
Expand Down
2 changes: 2 additions & 0 deletions sky/serve/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
# Autoscaler scale decision interval in seconds.
# We will try to scale up/down every `decision_interval`.
AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS = 20
# Autoscaler no replica decision interval in seconds.
AUTOSCALER_NO_REPLICA_DECISION_INTERVAL_SECONDS = 5
# Autoscaler default upscale delays in seconds.
# We will upscale only if the target number of instances
# is larger than the current launched instances for delay amount of time.
Expand Down
2 changes: 1 addition & 1 deletion sky/serve/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def _run_autoscaler(self):
f'{common_utils.format_exception(e)}')
with ux_utils.enable_traceback():
logger.error(f' Traceback: {traceback.format_exc()}')
time.sleep(constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS)
time.sleep(self._autoscaler.get_decision_interval())

def run(self) -> None:

Expand Down
7 changes: 7 additions & 0 deletions sky/serve/serve_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,9 @@ class ServiceStatus(enum.Enum):
# Clean up failed
FAILED_CLEANUP = 'FAILED_CLEANUP'

# No replica
NO_REPLICA = 'NO_REPLICA'

@classmethod
def failed_statuses(cls) -> List['ServiceStatus']:
return [cls.CONTROLLER_FAILED, cls.FAILED_CLEANUP]
Expand All @@ -175,6 +178,9 @@ def from_replica_statuses(
if sum(status2num[status]
for status in ReplicaStatus.failed_statuses()) > 0:
return cls.FAILED
# When min_replicas = 0, there is no (provisioning) replica.
if len(replica_statuses) == 0:
return cls.NO_REPLICA
MaoZiming marked this conversation as resolved.
Show resolved Hide resolved
return cls.REPLICA_INIT


Expand All @@ -186,6 +192,7 @@ def from_replica_statuses(
ServiceStatus.SHUTTING_DOWN: colorama.Fore.YELLOW,
ServiceStatus.FAILED: colorama.Fore.RED,
ServiceStatus.FAILED_CLEANUP: colorama.Fore.RED,
ServiceStatus.NO_REPLICA: colorama.Fore.MAGENTA,
}


Expand Down
4 changes: 2 additions & 2 deletions sky/serve/service_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ def __init__(
qps_upper_threshold: Optional[float] = None,
qps_lower_threshold: Optional[float] = None,
) -> None:
if min_replicas <= 0:
if min_replicas < 0:
with ux_utils.print_exception_no_traceback():
raise ValueError('min_replicas must be greater than 0')
raise ValueError('min_replicas must be greater or equal to 0')
if max_replicas is not None and max_replicas < min_replicas:
with ux_utils.print_exception_no_traceback():
raise ValueError(
Expand Down