From 83d2936abbbe3a938efb35c07f058b78ed2f9af9 Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Mon, 15 Aug 2022 12:33:49 -0700 Subject: [PATCH] Fix spot recovery without cloud specified (#1077) * Fix spot recovery without cloud specified * fix * format * format --- sky/data/storage.py | 3 +++ sky/execution.py | 6 ++---- sky/spot/recovery_strategy.py | 4 +++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/sky/data/storage.py b/sky/data/storage.py index ce6d8bf3ada..b3e7c64aaac 100644 --- a/sky/data/storage.py +++ b/sky/data/storage.py @@ -340,6 +340,9 @@ def __init__(self, assert mode in StorageMode self.sync_on_reconstruction = sync_on_reconstruction + # TODO(romilb, zhwu): This is a workaround to support storage deletion + # for spot. Once sky storage supports forced management for external + # buckets, this can be deprecated. self.force_delete = False # Validate and correct inputs if necessary diff --git a/sky/execution.py b/sky/execution.py index a221aee1107..6a3df036b43 100644 --- a/sky/execution.py +++ b/sky/execution.py @@ -400,10 +400,8 @@ def spot_launch( # Copy the local source to a bucket. The task will not be executed locally, # so we need to copy the files to the bucket manually here before sending to # the remote spot controller. - with backend_utils.safe_console_status( - '[bold cyan]Copying files to buckets'): - with backend_utils.suppress_output(): - task.add_storage_mounts() + with backend_utils.suppress_output(): + task.add_storage_mounts() # Replace the source field that is local path in all storage_mounts with # bucket URI and remove the name field. diff --git a/sky/spot/recovery_strategy.py b/sky/spot/recovery_strategy.py index 3b40a8858ce..0a41569e5cc 100644 --- a/sky/spot/recovery_strategy.py +++ b/sky/spot/recovery_strategy.py @@ -172,8 +172,10 @@ def recover(self) -> float: resources = list(task.resources)[0] original_resources = resources + launched_cloud = handle.launched_resources.cloud launched_region = handle.launched_resources.region - new_resources = resources.copy(region=launched_region) + new_resources = resources.copy(cloud=launched_cloud, + region=launched_region) task.set_resources({new_resources}) launched_time = self.launch(raise_on_failure=False) # Restore the original dag, i.e. reset the region constraint.