Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Exponential backoff period for 5xx errors and enabled retries by default #1444

Merged
merged 4 commits into from
May 18, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 16 additions & 17 deletions datadog/internal/transport/custom_transport.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,23 @@ import (
"bytes"
"context"
"io/ioutil"
"math"
"net/http"
"strconv"
"time"
)

var (
defaultHTTPRetryDuration = 5 * time.Second
defaultHTTPRetryTimeout = 60 * time.Second
rateLimitResetHeader = "X-Ratelimit-Reset"
defaultBackOffMultiplier float64 = 2
skarimo marked this conversation as resolved.
Show resolved Hide resolved
defaultBackOffBase float64 = 2
defaultHTTPRetryTimeout = 60 * time.Second
rateLimitResetHeader = "X-Ratelimit-Reset"
)

// CustomTransport holds DefaultTransport configuration and is used to for custom http error handling
type CustomTransport struct {
defaultTransport http.RoundTripper
httpRetryDuration time.Duration
httpRetryTimeout time.Duration
defaultTransport http.RoundTripper
httpRetryTimeout time.Duration
}

// CustomTransportOptions Set options for CustomTransport
Expand Down Expand Up @@ -51,17 +52,11 @@ func (t *CustomTransport) RoundTrip(req *http.Request) (*http.Response, error) {
}

// Check if request should be retried and get retry time
retryDuration, retry := t.retryRequest(resp)
retryDuration, retry := t.retryRequest(resp, retryCount)
if !retry {
return resp, respErr
}

// Calculate retryDuration if nil
if retryDuration == nil {
newRetryDurationVal := time.Duration(retryCount) * t.httpRetryDuration
retryDuration = &newRetryDurationVal
}

select {
case <-ctx.Done():
return resp, respErr
Expand All @@ -85,7 +80,7 @@ func (t *CustomTransport) copyRequest(r *http.Request) *http.Request {
return &newRequest
}

func (t *CustomTransport) retryRequest(response *http.Response) (*time.Duration, bool) {
func (t *CustomTransport) retryRequest(response *http.Response, retryCount int) (*time.Duration, bool) {
skarimo marked this conversation as resolved.
Show resolved Hide resolved
if v := response.Header.Get(rateLimitResetHeader); v != "" && response.StatusCode == 429 {
vInt, err := strconv.ParseInt(v, 10, 64)
if err != nil {
Expand All @@ -96,7 +91,12 @@ func (t *CustomTransport) retryRequest(response *http.Response) (*time.Duration,
}

if response.StatusCode >= 500 {
return nil, true
// Calculate the retry val (base * multiplier^2)
retryVal := defaultBackOffBase * math.Pow(defaultBackOffMultiplier, float64(retryCount))
// retry duration shouldn't exceed default timeout period
retryVal = math.Min(float64(t.httpRetryTimeout/time.Second), retryVal)
retryDuration := time.Duration(retryVal) * time.Second
return &retryDuration, true
}

return nil, false
Expand All @@ -110,8 +110,7 @@ func NewCustomTransport(t http.RoundTripper, opt CustomTransportOptions) *Custom
}

ct := CustomTransport{
defaultTransport: t,
httpRetryDuration: defaultHTTPRetryDuration,
defaultTransport: t,
}

if opt.Timeout != nil {
Expand Down
4 changes: 2 additions & 2 deletions datadog/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ func Provider() *schema.Provider {
"http_client_retry_enabled": {
Type: schema.TypeBool,
Optional: true,
DefaultFunc: schema.EnvDefaultFunc("DD_HTTP_CLIENT_RETRY_ENABLED", false),
Description: "Enables request retries on HTTP status codes 429 and 5xx.",
DefaultFunc: schema.EnvDefaultFunc("DD_HTTP_CLIENT_RETRY_ENABLED", true),
Description: "Enables request retries on HTTP status codes 429 and 5xx. Defaults to `true`.",
},
"http_client_retry_timeout": {
Type: schema.TypeInt,
Expand Down
2 changes: 1 addition & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,6 @@ provider "datadog" {
- `api_key` (String) (Required unless validate is false) Datadog API key. This can also be set via the DD_API_KEY environment variable.
- `api_url` (String) The API URL. This can also be set via the DD_HOST environment variable. Note that this URL must not end with the /api/ path. For example, https://api.datadoghq.com/ is a correct value, while https://api.datadoghq.com/api/ is not. And if you're working with "EU" version of Datadog, use https://api.datadoghq.eu/.
- `app_key` (String) (Required unless validate is false) Datadog APP key. This can also be set via the DD_APP_KEY environment variable.
- `http_client_retry_enabled` (Boolean) Enables request retries on HTTP status codes 429 and 5xx.
- `http_client_retry_enabled` (Boolean) Enables request retries on HTTP status codes 429 and 5xx. Defaults to `true`.
- `http_client_retry_timeout` (Number) The HTTP request retry timeout period.
- `validate` (Boolean) Enables validation of the provided API and APP keys during provider initialization. Default is true. When false, api_key and app_key won't be checked.
2 changes: 1 addition & 1 deletion docs/resources/downtime.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ resource "datadog_downtime" "foo" {

Required:

- `type` (String) One of `days`, `weeks`, `months`, or `years`
- `type` (String) One of `days`, `weeks`, `months`, `years`, or `rrule`.

Optional:

Expand Down