Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SkyServe] Support min_replicas = 0 #2938

Merged
merged 10 commits into from
Jan 6, 2024
3 changes: 3 additions & 0 deletions sky/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4386,6 +4386,9 @@ def serve_status(all: bool, endpoint: bool, service_names: List[str]):
down. This usually indicates resource leakages. If you see such status,
please login to the cloud console and double-check

- ``NO_REPLICAS``: The service has no replicas. This usually happens when
min_replicas is set to 0.
MaoZiming marked this conversation as resolved.
Show resolved Hide resolved

Each replica can have one of the following statuses:

- ``PENDING``: The maximum number of simultaneous launches has been reached
Expand Down
2 changes: 1 addition & 1 deletion sky/serve/autoscalers.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def _get_desired_num_replicas(self) -> int:
logger.info(f'Requests per second: {num_requests_per_second}, '
f'Current target number of replicas: {target_num_replicas}')

if not self.bootstrap_done:
if (not self.bootstrap_done or self.target_num_replicas == 0):
MaoZiming marked this conversation as resolved.
Show resolved Hide resolved
self.bootstrap_done = True
return target_num_replicas
elif target_num_replicas > self.target_num_replicas:
Expand Down
2 changes: 2 additions & 0 deletions sky/serve/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
# Autoscaler scale decision interval in seconds.
# We will try to scale up/down every `decision_interval`.
AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS = 20
# Autoscaler no replica decision interval in seconds.
AUTOSCALER_NO_REPLICA_DECISION_INTERVAL_SECONDS = 5
# Autoscaler default upscale delays in seconds.
# We will upscale only if the target number of instances
# is larger than the current launched instances for delay amount of time.
Expand Down
7 changes: 6 additions & 1 deletion sky/serve/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,12 @@ def _run_autoscaler(self):
f'{common_utils.format_exception(e)}')
with ux_utils.enable_traceback():
logger.error(f' Traceback: {traceback.format_exc()}')
time.sleep(constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS)
if self._autoscaler.target_num_replicas == 0:
time.sleep(
constants.AUTOSCALER_NO_REPLICA_DECISION_INTERVAL_SECONDS)
else:
time.sleep(
constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS)
MaoZiming marked this conversation as resolved.
Show resolved Hide resolved

def run(self) -> None:

Expand Down
6 changes: 6 additions & 0 deletions sky/serve/serve_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,9 @@ class ServiceStatus(enum.Enum):
# Clean up failed
FAILED_CLEANUP = 'FAILED_CLEANUP'

# No replica
NO_REPLICA = 'NO_REPLICA'

@classmethod
def failed_statuses(cls) -> List['ServiceStatus']:
return [cls.CONTROLLER_FAILED, cls.FAILED_CLEANUP]
Expand All @@ -175,6 +178,8 @@ def from_replica_statuses(
if sum(status2num[status]
for status in ReplicaStatus.failed_statuses()) > 0:
return cls.FAILED
if len(replica_statuses) == 0:
return cls.NO_REPLICA
MaoZiming marked this conversation as resolved.
Show resolved Hide resolved
return cls.REPLICA_INIT


Expand All @@ -186,6 +191,7 @@ def from_replica_statuses(
ServiceStatus.SHUTTING_DOWN: colorama.Fore.YELLOW,
ServiceStatus.FAILED: colorama.Fore.RED,
ServiceStatus.FAILED_CLEANUP: colorama.Fore.RED,
ServiceStatus.NO_REPLICA: colorama.Fore.MAGENTA,
}


Expand Down
4 changes: 2 additions & 2 deletions sky/serve/service_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ def __init__(
qps_upper_threshold: Optional[float] = None,
qps_lower_threshold: Optional[float] = None,
) -> None:
if min_replicas <= 0:
if min_replicas < 0:
with ux_utils.print_exception_no_traceback():
raise ValueError('min_replicas must be greater than 0')
raise ValueError('min_replicas must be greater or equal to 0')
if max_replicas is not None and max_replicas < min_replicas:
with ux_utils.print_exception_no_traceback():
raise ValueError(
Expand Down