From eda9ee2a4023efd7de3331fdf62f2a19b9b8f451 Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Thu, 9 Mar 2023 21:46:58 -0800 Subject: [PATCH 1/4] Catch any exception for the spot queue fetching failure. --- sky/cli.py | 3 +++ sky/utils/common_utils.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/sky/cli.py b/sky/cli.py index e1084e56839..36919a4c65d 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -1439,6 +1439,9 @@ def _get_spot_jobs( except RuntimeError: msg = ('Failed to query spot jobs due to connection ' 'issues. Try again later.') + except Exception as e: # pylint: disable=broad-except + msg = ('Failed to query spot jobs: ' + f'{common_utils.format_exception(e, use_bracket=True)}') else: max_jobs_to_show = (_NUM_SPOT_JOBS_TO_SHOW_IN_STATUS if limit_num_jobs_to_show else None) diff --git a/sky/utils/common_utils.py b/sky/utils/common_utils.py index 51fcfcd5079..356bba386e8 100644 --- a/sky/utils/common_utils.py +++ b/sky/utils/common_utils.py @@ -331,7 +331,7 @@ def format_exception(e: Union[Exception, SystemExit], bright = colorama.Style.BRIGHT reset = colorama.Style.RESET_ALL if use_bracket: - return f'{bright}[{class_fullname(e.__class__)}]:{reset} {e}' + return f'{bright}[{class_fullname(e.__class__)}]{reset} {e}' return f'{bright}{class_fullname(e.__class__)}:{reset} {e}' From dd6204081c2f6be1e1b5cfa42160de63eb3c7122 Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Thu, 9 Mar 2023 22:29:07 -0800 Subject: [PATCH 2/4] Fix keyboard interrupt --- sky/cli.py | 12 ++++++++++-- sky/clouds/service_catalog/config.py | 17 +++++++++-------- sky/sky_logging.py | 9 +++++++-- tests/conftest.py | 3 --- 4 files changed, 26 insertions(+), 15 deletions(-) diff --git a/sky/cli.py b/sky/cli.py index 36919a4c65d..ddb6f807bc9 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -1568,7 +1568,15 @@ def status(all: bool, refresh: bool, show_spot_jobs: bool, clusters: List[str]): click.echo(f'\n{colorama.Fore.CYAN}{colorama.Style.BRIGHT}' f'Managed spot jobs{colorama.Style.RESET_ALL}') with log_utils.safe_rich_status('[cyan]Checking spot jobs[/]'): - num_in_progress_jobs, msg = spot_jobs_future.get() + try: + num_in_progress_jobs, msg = spot_jobs_future.get() + except KeyboardInterrupt: + pool.terminate() + # Set to -1, so that the controller is not considered + # down, and the hint for showing sky spot queue + # will still be shown. + num_in_progress_jobs = -1 + msg = f'KeyboardInterrupt' try: pool.close() @@ -1598,7 +1606,7 @@ def status(all: bool, refresh: bool, show_spot_jobs: bool, clusters: List[str]): 'shown)') job_info += '. ' hints.append( - f'* {job_info}To see all jobs: {colorama.Style.BRIGHT}' + f'* {job_info}To see all spot jobs: {colorama.Style.BRIGHT}' f'sky spot queue{colorama.Style.RESET_ALL}') if num_pending_autostop > 0: diff --git a/sky/clouds/service_catalog/config.py b/sky/clouds/service_catalog/config.py index 50e693246de..175803c7c5d 100644 --- a/sky/clouds/service_catalog/config.py +++ b/sky/clouds/service_catalog/config.py @@ -5,18 +5,11 @@ import threading _thread_local_config = threading.local() -# Whether the caller requires the catalog to be narrowed down -# to the account-specific catalog (e.g., removing regions not -# enabled for the current account) or just the raw catalog -# fetched from SkyPilot catalog service. The former is used -# for launching clusters, while the latter for commands like -# `show-gpus`. -_thread_local_config.use_default_catalog = False @contextlib.contextmanager def _set_use_default_catalog(value: bool): - old_value = _thread_local_config.use_default_catalog + old_value = get_use_default_catalog() _thread_local_config.use_default_catalog = value try: yield @@ -24,7 +17,15 @@ def _set_use_default_catalog(value: bool): _thread_local_config.use_default_catalog = old_value +# Whether the caller requires the catalog to be narrowed down +# to the account-specific catalog (e.g., removing regions not +# enabled for the current account) or just the raw catalog +# fetched from SkyPilot catalog service. The former is used +# for launching clusters, while the latter for commands like +# `show-gpus`. def get_use_default_catalog() -> bool: + if not hasattr(_thread_local_config, 'use_default_catalog'): + _thread_local_config.use_default_catalog = False return _thread_local_config.use_default_catalog diff --git a/sky/sky_logging.py b/sky/sky_logging.py index 09287ce9925..a9928f6eb03 100644 --- a/sky/sky_logging.py +++ b/sky/sky_logging.py @@ -31,7 +31,6 @@ def format(self, record): _root_logger = logging.getLogger('sky') _default_handler = None _logging_config = threading.local() -_logging_config.is_silent = False # All code inside the library should use sky_logging.print() # rather than print(). @@ -79,7 +78,7 @@ def silent(): global print global _logging_config previous_level = _root_logger.level - previous_is_silent = _logging_config.is_silent + previous_is_silent = is_silent() previous_print = print # Turn off logger @@ -95,4 +94,10 @@ def silent(): def is_silent(): + if not hasattr(_logging_config, 'is_silent'): + # Should not set it globally, as the global assignment + # will be executed only once if the module is imported + # in the main thread, and will not be executed in other + # threads. + _logging_config.is_silent = False return _logging_config.is_silent diff --git a/tests/conftest.py b/tests/conftest.py index ed83f8d1e10..9c20348fd1a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,8 +1,5 @@ -import os -import pathlib import pytest import tempfile -import textwrap from typing import List # Usage: use From 4e586567f1b191f65d1a41fb4a7aa5aab2c19583 Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Thu, 9 Mar 2023 22:36:28 -0800 Subject: [PATCH 3/4] lint --- sky/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/cli.py b/sky/cli.py index ddb6f807bc9..0c8c0d58658 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -1576,7 +1576,7 @@ def status(all: bool, refresh: bool, show_spot_jobs: bool, clusters: List[str]): # down, and the hint for showing sky spot queue # will still be shown. num_in_progress_jobs = -1 - msg = f'KeyboardInterrupt' + msg = 'KeyboardInterrupt' try: pool.close() From 8268777b3abcc45aa52460a1228425a3ad6afe91 Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Fri, 10 Mar 2023 11:52:52 -0800 Subject: [PATCH 4/4] Add comment --- sky/clouds/service_catalog/config.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sky/clouds/service_catalog/config.py b/sky/clouds/service_catalog/config.py index 175803c7c5d..249f807d659 100644 --- a/sky/clouds/service_catalog/config.py +++ b/sky/clouds/service_catalog/config.py @@ -25,6 +25,10 @@ def _set_use_default_catalog(value: bool): # `show-gpus`. def get_use_default_catalog() -> bool: if not hasattr(_thread_local_config, 'use_default_catalog'): + # Should not set it globally, as the global assignment + # will be executed only once if the module is imported + # in the main thread, and will not be executed in other + # threads. _thread_local_config.use_default_catalog = False return _thread_local_config.use_default_catalog