Skip to content

Commit

Permalink
[UX] Catch any exception for the spot queue fetching failure (skypilo…
Browse files Browse the repository at this point in the history
…t-org#1757)

* Catch any exception for the spot queue fetching failure.

* Fix keyboard interrupt

* lint

* Add comment
  • Loading branch information
Michaelvll authored and Sumanth committed Mar 14, 2023
1 parent 4620034 commit 5bff138
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 16 deletions.
15 changes: 13 additions & 2 deletions sky/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -1439,6 +1439,9 @@ def _get_spot_jobs(
except RuntimeError:
msg = ('Failed to query spot jobs due to connection '
'issues. Try again later.')
except Exception as e: # pylint: disable=broad-except
msg = ('Failed to query spot jobs: '
f'{common_utils.format_exception(e, use_bracket=True)}')
else:
max_jobs_to_show = (_NUM_SPOT_JOBS_TO_SHOW_IN_STATUS
if limit_num_jobs_to_show else None)
Expand Down Expand Up @@ -1565,7 +1568,15 @@ def status(all: bool, refresh: bool, show_spot_jobs: bool, clusters: List[str]):
click.echo(f'\n{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
f'Managed spot jobs{colorama.Style.RESET_ALL}')
with log_utils.safe_rich_status('[cyan]Checking spot jobs[/]'):
num_in_progress_jobs, msg = spot_jobs_future.get()
try:
num_in_progress_jobs, msg = spot_jobs_future.get()
except KeyboardInterrupt:
pool.terminate()
# Set to -1, so that the controller is not considered
# down, and the hint for showing sky spot queue
# will still be shown.
num_in_progress_jobs = -1
msg = 'KeyboardInterrupt'

try:
pool.close()
Expand Down Expand Up @@ -1595,7 +1606,7 @@ def status(all: bool, refresh: bool, show_spot_jobs: bool, clusters: List[str]):
'shown)')
job_info += '. '
hints.append(
f'* {job_info}To see all jobs: {colorama.Style.BRIGHT}'
f'* {job_info}To see all spot jobs: {colorama.Style.BRIGHT}'
f'sky spot queue{colorama.Style.RESET_ALL}')

if num_pending_autostop > 0:
Expand Down
21 changes: 13 additions & 8 deletions sky/clouds/service_catalog/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,31 @@
import threading

_thread_local_config = threading.local()
# Whether the caller requires the catalog to be narrowed down
# to the account-specific catalog (e.g., removing regions not
# enabled for the current account) or just the raw catalog
# fetched from SkyPilot catalog service. The former is used
# for launching clusters, while the latter for commands like
# `show-gpus`.
_thread_local_config.use_default_catalog = False


@contextlib.contextmanager
def _set_use_default_catalog(value: bool):
old_value = _thread_local_config.use_default_catalog
old_value = get_use_default_catalog()
_thread_local_config.use_default_catalog = value
try:
yield
finally:
_thread_local_config.use_default_catalog = old_value


# Whether the caller requires the catalog to be narrowed down
# to the account-specific catalog (e.g., removing regions not
# enabled for the current account) or just the raw catalog
# fetched from SkyPilot catalog service. The former is used
# for launching clusters, while the latter for commands like
# `show-gpus`.
def get_use_default_catalog() -> bool:
if not hasattr(_thread_local_config, 'use_default_catalog'):
# Should not set it globally, as the global assignment
# will be executed only once if the module is imported
# in the main thread, and will not be executed in other
# threads.
_thread_local_config.use_default_catalog = False
return _thread_local_config.use_default_catalog


Expand Down
9 changes: 7 additions & 2 deletions sky/sky_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ def format(self, record):
_root_logger = logging.getLogger('sky')
_default_handler = None
_logging_config = threading.local()
_logging_config.is_silent = False

# All code inside the library should use sky_logging.print()
# rather than print().
Expand Down Expand Up @@ -79,7 +78,7 @@ def silent():
global print
global _logging_config
previous_level = _root_logger.level
previous_is_silent = _logging_config.is_silent
previous_is_silent = is_silent()
previous_print = print

# Turn off logger
Expand All @@ -95,4 +94,10 @@ def silent():


def is_silent():
if not hasattr(_logging_config, 'is_silent'):
# Should not set it globally, as the global assignment
# will be executed only once if the module is imported
# in the main thread, and will not be executed in other
# threads.
_logging_config.is_silent = False
return _logging_config.is_silent
2 changes: 1 addition & 1 deletion sky/utils/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ def format_exception(e: Union[Exception, SystemExit],
bright = colorama.Style.BRIGHT
reset = colorama.Style.RESET_ALL
if use_bracket:
return f'{bright}[{class_fullname(e.__class__)}]:{reset} {e}'
return f'{bright}[{class_fullname(e.__class__)}]{reset} {e}'
return f'{bright}{class_fullname(e.__class__)}:{reset} {e}'


Expand Down
3 changes: 0 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
import os
import pathlib
import pytest
import tempfile
import textwrap
from typing import List

# Usage: use
Expand Down

0 comments on commit 5bff138

Please sign in to comment.