Skip to content

Commit

Permalink
feat: use exponential backoff algorithm when polling actions (ansible…
Browse files Browse the repository at this point in the history
…-collections#524)

##### SUMMARY

Replace the constant poll interval of 1 second, with a truncated
exponential back off algorithm with jitter.

Below is a suite of poll interval (in seconds) generated by the new
algorithm:
```
1.49
2.14
5.46
6.51
6.57
5.57
5.98
7.13
6.59
7.10
5.54
5.03
6.56
5.96
6.72
7.21
7.05
5.31
5.60
6.33
6.82
5.42
6.08
6.60
TOTAL: 140.77
```
  • Loading branch information
jooola authored Jul 4, 2024
1 parent ecaeac1 commit 19e586f
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 11 deletions.
2 changes: 2 additions & 0 deletions changelogs/fragments/exponential-actions-polling-interval.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
minor_changes:
- Use a truncated exponential backoff algorithm when polling actions from the API.
20 changes: 20 additions & 0 deletions plugins/module_utils/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import annotations

from contextlib import contextmanager
from random import random

from ansible.module_utils.basic import missing_required_lib

Expand Down Expand Up @@ -106,3 +107,22 @@ def cached_session(self):
yield
finally:
self._requests_session = requests.Session()


def exponential_backoff_poll_interval(*, base: float, multiplier: int, cap: float, jitter: float):
"""
Return a poll interval function, implementing a truncated exponential backoff with jitter.
:param base: Base for the exponential backoff algorithm.
:param multiplier: Multiplier for the exponential backoff algorithm.
:param cap: Value at which the interval is truncated.
:param jitter: Proportion of the interval to add as random jitter.
"""

def func(retries: int) -> float:
interval = base * multiplier**retries # Exponential backoff
interval = min(cap, interval) # Cap backoff
interval += random() * interval * jitter # Add jitter
return interval

return func
10 changes: 9 additions & 1 deletion plugins/module_utils/hcloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@
check_required_one_of,
)

from .client import ClientException, client_check_required_lib, client_get_by_name_or_id
from .client import (
ClientException,
client_check_required_lib,
client_get_by_name_or_id,
exponential_backoff_poll_interval,
)
from .vendor.hcloud import APIException, Client, HCloudException
from .vendor.hcloud.actions import ActionException
from .version import version
Expand Down Expand Up @@ -81,6 +86,9 @@ def _build_client(self) -> None:
api_endpoint=self.module.params["api_endpoint"],
application_name="ansible-module",
application_version=version,
# Total waiting time before timeout is > 117.0
poll_interval=exponential_backoff_poll_interval(base=1.0, multiplier=2, cap=5.0, jitter=0.5),
poll_max_retries=25,
)

def _client_get_by_name_or_id(self, resource: str, param: str | int):
Expand Down
2 changes: 1 addition & 1 deletion plugins/modules/certificate.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def _create_certificate(self):
resp = self.client.certificates.create_managed(**params)
# Action should take 60 to 90 seconds on average, wait for 5m to
# allow DNS or Let's Encrypt slowdowns.
resp.action.wait_until_finished(max_retries=300)
resp.action.wait_until_finished(max_retries=62) # 62 retries >= 302 seconds
except HCloudException as exception:
self.fail_json_hcloud(exception)

Expand Down
19 changes: 10 additions & 9 deletions plugins/modules/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ def _create_server(self):
self.result["root_password"] = resp.root_password
# Action should take 60 to 90 seconds on average, but can be >10m when creating a
# server from a custom images
resp.action.wait_until_finished(max_retries=1800)
resp.action.wait_until_finished(max_retries=362) # 362 retries >= 1802 seconds
for action in resp.next_actions:
action.wait_until_finished()

Expand Down Expand Up @@ -671,17 +671,18 @@ def _update_server_server_type(self) -> None:

self.stop_server_if_forced()

upgrade_disk = self.module.params.get("upgrade_disk")
# Upgrading a server takes 160 seconds on average, upgrading the disk should
# take more time
upgrade_timeout = 600 if upgrade_disk else 180

if not self.module.check_mode:
upgrade_disk = self.module.params.get("upgrade_disk")

action = self.hcloud_server.change_type(
server_type=self._get_server_type(),
upgrade_disk=upgrade_disk,
)
action.wait_until_finished(max_retries=upgrade_timeout)
# Upgrading a server takes 160 seconds on average, upgrading the disk should
# take more time
# 122 retries >= 602 seconds
# 38 retries >= 182 seconds
action.wait_until_finished(max_retries=122 if upgrade_disk else 38)
self._mark_as_changed()

def _update_server_ip(self, kind: Literal["ipv4", "ipv6"]) -> None:
Expand Down Expand Up @@ -867,9 +868,9 @@ def rebuild_server(self):
try:
if not self.module.check_mode:
image = self._get_image(self.hcloud_server.server_type)
# When we rebuild the server progress takes some more time.
resp = self.client.servers.rebuild(self.hcloud_server, image)
resp.action.wait_until_finished(1000)
# When we rebuild the server progress takes some more time.
resp.action.wait_until_finished(max_retries=202) # 202 retries >= 1002 seconds
self._mark_as_changed()

self._get_server()
Expand Down
14 changes: 14 additions & 0 deletions tests/unit/module_utils/test_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from __future__ import annotations

from ansible_collections.hetzner.hcloud.plugins.module_utils.client import (
exponential_backoff_poll_interval,
)


def test_exponential_backoff_poll_interval():
poll_interval = exponential_backoff_poll_interval(base=1.0, multiplier=2, cap=5.0, jitter=0.0)
poll_max_retries = 25

results = [poll_interval(i) for i in range(poll_max_retries)]
assert sum(results) == 117.0
assert results[:6] == [1.0, 2.0, 4.0, 5.0, 5.0, 5.0]

0 comments on commit 19e586f

Please sign in to comment.