Skip to content

Commit

Permalink
Add ux_utils.print_exception_no_traceback() for cleaner error output
Browse files Browse the repository at this point in the history
Fixes skypilot-org#4096

Add `ux_utils.print_exception_no_traceback()` to error handling blocks in `sky/serve/serve_utils.py` to simplify error messages and reduce traceback clutter.

* Add `ux_utils.print_exception_no_traceback()` to the error handling block in the `set_service_status_and_active_versions_from_replica` function.
* Add `ux_utils.print_exception_no_traceback()` to the error handling block in the `_get_service_status` function.
* Add `ux_utils.print_exception_no_traceback()` to the error handling block in the `update_service_encoded` function.
* Add `ux_utils.print_exception_no_traceback()` to the error handling block in the `check_service_status_healthy` function.
* Add `ux_utils.print_exception_no_traceback()` to the error handling block in the `stream_replica_logs` function.
  • Loading branch information
andylizf committed Oct 17, 2024
1 parent 5dc70e8 commit 5f1a1a0
Showing 1 changed file with 32 additions and 20 deletions.
52 changes: 32 additions & 20 deletions sky/serve/serve_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,9 +246,10 @@ def set_service_status_and_active_versions_from_replica(
update_mode: UpdateMode) -> None:
record = serve_state.get_service_from_name(service_name)
if record is None:
raise ValueError('The service is up-ed in an old version and does not '
'support update. Please `sky serve down` '
'it first and relaunch the service.')
with ux_utils.print_exception_no_traceback():
raise ValueError('The service is up-ed in an old version and does not '
'support update. Please `sky serve down` '
'it first and relaunch the service.')
if record['status'] == serve_state.ServiceStatus.SHUTTING_DOWN:
# When the service is shutting down, there is a period of time which the
# controller still responds to the request, and the replica is not
Expand Down Expand Up @@ -289,7 +290,8 @@ def update_service_status() -> None:
def update_service_encoded(service_name: str, version: int, mode: str) -> str:
service_status = _get_service_status(service_name)
if service_status is None:
raise ValueError(f'Service {service_name!r} does not exist.')
with ux_utils.print_exception_no_traceback():
raise ValueError(f'Service {service_name!r} does not exist.')
controller_port = service_status['controller_port']
resp = requests.post(
_CONTROLLER_URL.format(CONTROLLER_PORT=controller_port) +
Expand All @@ -299,15 +301,19 @@ def update_service_encoded(service_name: str, version: int, mode: str) -> str:
'mode': mode,
})
if resp.status_code == 404:
raise ValueError('The service is up-ed in an old version and does not '
'support update. Please `sky serve down` '
'it first and relaunch the service. ')
with ux_utils.print_exception_no_traceback():
raise ValueError('The service is up-ed in an old version and does not '
'support update. Please `sky serve down` '
'it first and relaunch the service. ')
elif resp.status_code == 400:
raise ValueError(f'Client error during service update: {resp.text}')
with ux_utils.print_exception_no_traceback():
raise ValueError(f'Client error during service update: {resp.text}')
elif resp.status_code == 500:
raise RuntimeError(f'Server error during service update: {resp.text}')
with ux_utils.print_exception_no_traceback():
raise RuntimeError(f'Server error during service update: {resp.text}')
elif resp.status_code != 200:
raise ValueError(f'Failed to update service: {resp.text}')
with ux_utils.print_exception_no_traceback():
raise ValueError(f'Failed to update service: {resp.text}')

service_msg = resp.json()['message']
return common_utils.encode_payload(service_msg)
Expand Down Expand Up @@ -527,10 +533,12 @@ def load_service_initialization_result(payload: str) -> int:
def check_service_status_healthy(service_name: str) -> Optional[str]:
service_record = serve_state.get_service_from_name(service_name)
if service_record is None:
return f'Service {service_name!r} does not exist.'
with ux_utils.print_exception_no_traceback():
return f'Service {service_name!r} does not exist.'
if service_record['status'] == serve_state.ServiceStatus.CONTROLLER_INIT:
return (f'Service {service_name!r} is still initializing its '
'controller. Please try again later.')
with ux_utils.print_exception_no_traceback():
return (f'Service {service_name!r} is still initializing its '
'controller. Please try again later.')
return None


Expand Down Expand Up @@ -633,8 +641,9 @@ def stream_replica_logs(service_name: str, replica_id: int,
launch_log_file_name = generate_replica_launch_log_file_name(
service_name, replica_id)
if not os.path.exists(launch_log_file_name):
return (f'{colorama.Fore.RED}Replica {replica_id} doesn\'t exist.'
f'{colorama.Style.RESET_ALL}')
with ux_utils.print_exception_no_traceback():
return (f'{colorama.Fore.RED}Replica {replica_id} doesn\'t exist.'
f'{colorama.Style.RESET_ALL}')

replica_cluster_name = generate_replica_cluster_name(
service_name, replica_id)
Expand All @@ -644,8 +653,9 @@ def _get_replica_status() -> serve_state.ReplicaStatus:
for info in replica_info:
if info.replica_id == replica_id:
return info.status
raise ValueError(
_FAILED_TO_FIND_REPLICA_MSG.format(replica_id=replica_id))
with ux_utils.print_exception_no_traceback():
raise ValueError(
_FAILED_TO_FIND_REPLICA_MSG.format(replica_id=replica_id))

finish_stream = (
lambda: _get_replica_status() != serve_state.ReplicaStatus.PROVISIONING)
Expand All @@ -664,7 +674,8 @@ def _get_replica_status() -> serve_state.ReplicaStatus:
handle = global_user_state.get_handle_from_cluster_name(
replica_cluster_name)
if handle is None:
return _FAILED_TO_FIND_REPLICA_MSG.format(replica_id=replica_id)
with ux_utils.print_exception_no_traceback():
return _FAILED_TO_FIND_REPLICA_MSG.format(replica_id=replica_id)
assert isinstance(handle, backends.CloudVmRayResourceHandle), handle

# Notify user here to make sure user won't think the log is finished.
Expand All @@ -674,8 +685,9 @@ def _get_replica_status() -> serve_state.ReplicaStatus:
# Always tail the latest logs, which represent user setup & run.
returncode = backend.tail_logs(handle, job_id=None, follow=follow)
if returncode != 0:
return (f'{colorama.Fore.RED}Failed to stream logs for replica '
f'{replica_id}.{colorama.Style.RESET_ALL}')
with ux_utils.print_exception_no_traceback():
return (f'{colorama.Fore.RED}Failed to stream logs for replica '
f'{replica_id}.{colorama.Style.RESET_ALL}')
return ''


Expand Down

0 comments on commit 5f1a1a0

Please sign in to comment.