Skip to content

Commit

Permalink
Remove Exponential Backoff for Retry Until Up (skypilot-org#2821)
Browse files Browse the repository at this point in the history
* remove exponential backoff

* add back random jitter

* format

* use backoff

* format

* add comment

* rerun checks

* format

* pylint

* form

* change to 30
  • Loading branch information
rohanvaidya45 authored and remyleone committed Dec 22, 2023
1 parent 462429a commit 4695c25
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions sky/backends/cloud_vm_ray_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@

# Time gap between retries after failing to provision in all possible places.
# Used only if --retry-until-up is set.
_RETRY_UNTIL_UP_INIT_GAP_SECONDS = 60
_RETRY_UNTIL_UP_INIT_GAP_SECONDS = 30

# The maximum retry count for fetching IP address.
_FETCH_IP_MAX_ATTEMPTS = 3
Expand Down Expand Up @@ -2873,7 +2873,13 @@ def _provision(
# TODO(suquark): once we have sky on PyPI, we should directly
# install sky from PyPI.
local_wheel_path, wheel_hash = wheel_utils.build_sky_wheel()
backoff = common_utils.Backoff(_RETRY_UNTIL_UP_INIT_GAP_SECONDS)
# The most frequent reason for the failure of a provision
# request is resource unavailability instead of rate
# limiting; to make users wait shorter, we do not make
# backoffs exponential.
backoff = common_utils.Backoff(
initial_backoff=_RETRY_UNTIL_UP_INIT_GAP_SECONDS,
max_backoff_factor=1)
attempt_cnt = 1
while True:
# For on-demand instances, RetryingVmProvisioner will retry
Expand Down Expand Up @@ -2927,7 +2933,7 @@ def _provision(
f'{colorama.Style.BRIGHT}=== Retry until up ==='
f'{colorama.Style.RESET_ALL}\n'
f'Retrying provisioning after {gap_seconds:.0f}s '
'(exponential backoff with random jittering). '
'(backoff with random jittering). '
f'Already tried {attempt_cnt} attempt{plural}.')
attempt_cnt += 1
time.sleep(gap_seconds)
Expand Down

0 comments on commit 4695c25

Please sign in to comment.