Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Core][Serve] Hide execute api from sky.execution #3058

Merged
merged 2 commits into from
Feb 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions sky/backends/backend_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from sky import exceptions
from sky import global_user_state
from sky import provision as provision_lib
from sky import serve as serve_lib
from sky import sky_logging
from sky import skypilot_config
from sky import status_lib
Expand Down Expand Up @@ -2525,7 +2526,7 @@ def get_task_demands_dict(task: 'task_lib.Task') -> Dict[str, float]:
# For sky serve controller task, we set the CPU resource to a smaller
# value to support a larger number of services.
resources_dict = {
'CPU': (constants.SERVICES_TASK_CPU_DEMAND
'CPU': (serve_lib.SERVICES_TASK_CPU_DEMAND
if task.service_name is not None else DEFAULT_TASK_CPU_DEMAND)
}
if task.best_resources is not None:
Expand All @@ -2546,7 +2547,7 @@ def get_task_resources_str(task: 'task_lib.Task') -> str:
The resources string is only used as a display purpose, so we only show
the accelerator demands (if any). Otherwise, the CPU demand is shown.
"""
task_cpu_demand = (constants.SERVICES_TASK_CPU_DEMAND if task.service_name
task_cpu_demand = (serve_lib.SERVICES_TASK_CPU_DEMAND if task.service_name
is not None else DEFAULT_TASK_CPU_DEMAND)
if task.best_resources is not None:
accelerator_dict = task.best_resources.accelerators
Expand Down
14 changes: 8 additions & 6 deletions sky/execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def _maybe_clone_disk_from_cluster(clone_disk_from: Optional[str],
return task


def execute(
def _execute(
entrypoint: Union['sky.Task', 'sky.Dag'],
dryrun: bool = False,
down: bool = False,
Expand Down Expand Up @@ -402,6 +402,7 @@ def launch(
# pylint: disable=invalid-name
_is_launched_by_spot_controller: bool = False,
_is_launched_by_sky_serve_controller: bool = False,
_disable_controller_check: bool = False,
) -> Tuple[Optional[int], Optional[backends.ResourceHandle]]:
# NOTE(dev): Keep the docstring consistent between the Python API and CLI.
"""Launch a cluster or task.
Expand Down Expand Up @@ -490,10 +491,11 @@ def launch(
if dryrun.
"""
entrypoint = task
controller_utils.check_cluster_name_not_controller(
cluster_name, operation_str='sky.launch')
if not _disable_controller_check:
controller_utils.check_cluster_name_not_controller(
cluster_name, operation_str='sky.launch')

return execute(
return _execute(
entrypoint=entrypoint,
dryrun=dryrun,
down=down,
Expand Down Expand Up @@ -590,7 +592,7 @@ def exec( # pylint: disable=redefined-builtin
operation='executing tasks',
check_cloud_vm_ray_backend=False,
dryrun=dryrun)
return execute(
return _execute(
entrypoint=entrypoint,
dryrun=dryrun,
down=down,
Expand Down Expand Up @@ -701,7 +703,7 @@ def spot_launch(
f'Launching managed spot job {dag.name!r} from spot controller...'
f'{colorama.Style.RESET_ALL}')
print('Launching spot controller...')
execute(
_execute(
entrypoint=controller_task,
stream_logs=stream_logs,
cluster_name=controller_name,
Expand Down
1 change: 1 addition & 0 deletions sky/serve/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from sky.serve.constants import ENDPOINT_PROBE_INTERVAL_SECONDS
from sky.serve.constants import INITIAL_VERSION
from sky.serve.constants import LB_CONTROLLER_SYNC_INTERVAL_SECONDS
from sky.serve.constants import SERVICES_TASK_CPU_DEMAND
from sky.serve.constants import SKYSERVE_METADATA_DIR
from sky.serve.core import down
from sky.serve.core import status
Expand Down
4 changes: 4 additions & 0 deletions sky/serve/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@
# do some log rotation.
CONTROLLER_RESOURCES = {'cpus': '4+', 'disk_size': 200}

# A default controller with 4 vCPU and 16 GB memory can run up to 16 services.
SERVICES_MEMORY_USAGE_GB = 1.0
SERVICES_TASK_CPU_DEMAND = 0.25

# A period of time to initialize your service. Any readiness probe failures
# during this period will be ignored.
DEFAULT_INITIAL_DELAY_SECONDS = 1200
Expand Down
9 changes: 3 additions & 6 deletions sky/serve/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,6 @@ def up(
task: sky.Task to serve up.
service_name: Name of the service.
"""
# This is to avoid circular import.
# pylint: disable=import-outside-toplevel
from sky import execution

if service_name is None:
service_name = serve_utils.generate_service_name()

Expand Down Expand Up @@ -163,14 +159,15 @@ def up(
# whether the service is already running. If the id is the same
# with the current job id, we know the service is up and running
# for the first time; otherwise it is a name conflict.
controller_job_id, controller_handle = execution.execute(
entrypoint=controller_task,
controller_job_id, controller_handle = sky.launch(
task=controller_task,
stream_logs=False,
cluster_name=controller_name,
detach_run=True,
idle_minutes_to_autostop=constants.
CONTROLLER_IDLE_MINUTES_TO_AUTOSTOP,
retry_until_up=True,
_disable_controller_check=True,
)

style = colorama.Style
Expand Down
4 changes: 1 addition & 3 deletions sky/serve/serve_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
from sky import status_lib
from sky.serve import constants
from sky.serve import serve_state
from sky.skylet import constants as skylet_constants
from sky.skylet import job_lib
from sky.utils import common_utils
from sky.utils import log_utils
Expand All @@ -38,8 +37,7 @@
SKY_SERVE_CONTROLLER_NAME: str = (
f'sky-serve-controller-{common_utils.get_user_hash()}')
_SYSTEM_MEMORY_GB = psutil.virtual_memory().total // (1024**3)
NUM_SERVICE_THRESHOLD = (_SYSTEM_MEMORY_GB //
skylet_constants.SERVICES_MEMORY_USAGE_GB)
NUM_SERVICE_THRESHOLD = _SYSTEM_MEMORY_GB // constants.SERVICES_MEMORY_USAGE_GB
_CONTROLLER_URL = 'http://localhost:{CONTROLLER_PORT}'

_SKYPILOT_PROVISION_LOG_PATTERN = r'.*tail -n100 -f (.*provision\.log).*'
Expand Down
6 changes: 0 additions & 6 deletions sky/skylet/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,6 @@
# Port on the remote spot controller that the dashboard is running on.
SPOT_DASHBOARD_REMOTE_PORT = 5000

# A default controller with 4 vCPU and 16 GB memory can run up to 16 services.
# TODO(tian): This is to fix circular imports. Move this back to
# sky.serve.constants.
SERVICES_MEMORY_USAGE_GB = 1.0
SERVICES_TASK_CPU_DEMAND = 0.25

# Docker default options
DEFAULT_DOCKER_CONTAINER_NAME = 'sky_container'
DEFAULT_DOCKER_PORT = 10022
Expand Down
Loading