diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index 6007bf377266..dfcbe32df904 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -87,7 +87,7 @@ # Time gap between retries after failing to provision in all possible places. # Used only if --retry-until-up is set. -_RETRY_UNTIL_UP_INIT_GAP_SECONDS = 60 +_RETRY_UNTIL_UP_INIT_GAP_SECONDS = 30 # The maximum retry count for fetching IP address. _FETCH_IP_MAX_ATTEMPTS = 3 @@ -2873,7 +2873,13 @@ def _provision( # TODO(suquark): once we have sky on PyPI, we should directly # install sky from PyPI. local_wheel_path, wheel_hash = wheel_utils.build_sky_wheel() - backoff = common_utils.Backoff(_RETRY_UNTIL_UP_INIT_GAP_SECONDS) + # The most frequent reason for the failure of a provision + # request is resource unavailability instead of rate + # limiting; to make users wait shorter, we do not make + # backoffs exponential. + backoff = common_utils.Backoff( + initial_backoff=_RETRY_UNTIL_UP_INIT_GAP_SECONDS, + max_backoff_factor=1) attempt_cnt = 1 while True: # For on-demand instances, RetryingVmProvisioner will retry @@ -2927,7 +2933,7 @@ def _provision( f'{colorama.Style.BRIGHT}=== Retry until up ===' f'{colorama.Style.RESET_ALL}\n' f'Retrying provisioning after {gap_seconds:.0f}s ' - '(exponential backoff with random jittering). ' + '(backoff with random jittering). ' f'Already tried {attempt_cnt} attempt{plural}.') attempt_cnt += 1 time.sleep(gap_seconds)