From 2a801855816c6d4cdc9116035315bbfa8cbb9836 Mon Sep 17 00:00:00 2001 From: Jason Del Ponte Date: Tue, 13 Aug 2019 09:37:19 -0700 Subject: [PATCH] aws/request: Reorganize retry behavior to reduce separation (#2744) Reorganizes the SDK's retry utilities to follow a consistent code path. Request.IsErrorRetryable is the primary entry pointer for determining if a request error should be retryable instead of split between Request.Send and DefaultRetryer calling IsErrorRetryable. This also gives the implementation of the Retryer interface consistent control over when a request will be retried. Also adds support for request and service specific API error retry and throttling to be enabled by specifying API error codes before a request is sent. The "RetryErrorCodes" and "ThrottleErrorCodes" members were added to request.Request struct. These are used by the Request's IsErrorRetryable and IsErrorThrottle respectively to specify additional API error codes that identify the failed request attempt as needing to be retried or throttled. The DefaultRetryer uses both of these methods when determining if a failed request attempt should be retried. Related to #1376 --- aws/client/default_retryer.go | 31 ++---- aws/client/default_retryer_test.go | 2 +- aws/corehandlers/handlers.go | 60 +++++----- aws/request/http_request_retry_test.go | 6 +- aws/request/request.go | 79 ++++---------- aws/request/request_retry_test.go | 72 ++++++++++-- aws/request/request_test.go | 12 +- aws/request/retryer.go | 145 +++++++++++++++++++++---- 8 files changed, 259 insertions(+), 148 deletions(-) diff --git a/aws/client/default_retryer.go b/aws/client/default_retryer.go index a397b0d044c..1df4b992d50 100644 --- a/aws/client/default_retryer.go +++ b/aws/client/default_retryer.go @@ -14,12 +14,12 @@ import ( // struct and override the specific methods. For example, to override only // the MaxRetries method: // -// type retryer struct { -// client.DefaultRetryer -// } +// type retryer struct { +// client.DefaultRetryer +// } // -// // This implementation always has 100 max retries -// func (d retryer) MaxRetries() int { return 100 } +// // This implementation always has 100 max retries +// func (d retryer) MaxRetries() int { return 100 } type DefaultRetryer struct { NumMaxRetries int } @@ -34,8 +34,8 @@ func (d DefaultRetryer) MaxRetries() int { func (d DefaultRetryer) RetryRules(r *request.Request) time.Duration { // Set the upper limit of delay in retrying at ~five minutes minTime := 30 - throttle := d.shouldThrottle(r) - if throttle { + isThrottle := r.IsErrorThrottle() + if isThrottle { if delay, ok := getRetryDelay(r); ok { return delay } @@ -44,7 +44,7 @@ func (d DefaultRetryer) RetryRules(r *request.Request) time.Duration { } retryCount := r.RetryCount - if throttle && retryCount > 8 { + if isThrottle && retryCount > 8 { retryCount = 8 } else if retryCount > 13 { retryCount = 13 @@ -65,21 +65,8 @@ func (d DefaultRetryer) ShouldRetry(r *request.Request) bool { if r.HTTPResponse.StatusCode >= 500 && r.HTTPResponse.StatusCode != 501 { return true } - return r.IsErrorRetryable() || d.shouldThrottle(r) -} - -// ShouldThrottle returns true if the request should be throttled. -func (d DefaultRetryer) shouldThrottle(r *request.Request) bool { - switch r.HTTPResponse.StatusCode { - case 429: - case 502: - case 503: - case 504: - default: - return r.IsErrorThrottle() - } - return true + return r.IsErrorRetryable() || r.IsErrorThrottle() } // This will look in the Retry-After header, RFC 7231, for how long diff --git a/aws/client/default_retryer_test.go b/aws/client/default_retryer_test.go index fddba4e49e2..0c6e068a1db 100644 --- a/aws/client/default_retryer_test.go +++ b/aws/client/default_retryer_test.go @@ -60,7 +60,7 @@ func TestRetryThrottleStatusCodes(t *testing.T) { d := DefaultRetryer{NumMaxRetries: 10} for i, c := range cases { - throttle := d.shouldThrottle(&c.r) + throttle := c.r.IsErrorThrottle() retry := d.ShouldRetry(&c.r) if e, a := c.expectThrottle, throttle; e != a { diff --git a/aws/corehandlers/handlers.go b/aws/corehandlers/handlers.go index f8853d78af2..0c60e612ea5 100644 --- a/aws/corehandlers/handlers.go +++ b/aws/corehandlers/handlers.go @@ -159,9 +159,9 @@ func handleSendError(r *request.Request, err error) { Body: ioutil.NopCloser(bytes.NewReader([]byte{})), } } - // Catch all other request errors. + // Catch all request errors, and let the default retrier determine + // if the error is retryable. r.Error = awserr.New("RequestError", "send request failed", err) - r.Retryable = aws.Bool(true) // network errors are retryable // Override the error with a context canceled error, if that was canceled. ctx := r.Context() @@ -184,37 +184,39 @@ var ValidateResponseHandler = request.NamedHandler{Name: "core.ValidateResponseH // AfterRetryHandler performs final checks to determine if the request should // be retried and how long to delay. -var AfterRetryHandler = request.NamedHandler{Name: "core.AfterRetryHandler", Fn: func(r *request.Request) { - // If one of the other handlers already set the retry state - // we don't want to override it based on the service's state - if r.Retryable == nil || aws.BoolValue(r.Config.EnforceShouldRetryCheck) { - r.Retryable = aws.Bool(r.ShouldRetry(r)) - } +var AfterRetryHandler = request.NamedHandler{ + Name: "core.AfterRetryHandler", + Fn: func(r *request.Request) { + // If one of the other handlers already set the retry state + // we don't want to override it based on the service's state + if r.Retryable == nil || aws.BoolValue(r.Config.EnforceShouldRetryCheck) { + r.Retryable = aws.Bool(r.ShouldRetry(r)) + } - if r.WillRetry() { - r.RetryDelay = r.RetryRules(r) + if r.WillRetry() { + r.RetryDelay = r.RetryRules(r) - if sleepFn := r.Config.SleepDelay; sleepFn != nil { - // Support SleepDelay for backwards compatibility and testing - sleepFn(r.RetryDelay) - } else if err := aws.SleepWithContext(r.Context(), r.RetryDelay); err != nil { - r.Error = awserr.New(request.CanceledErrorCode, - "request context canceled", err) - r.Retryable = aws.Bool(false) - return - } + if sleepFn := r.Config.SleepDelay; sleepFn != nil { + // Support SleepDelay for backwards compatibility and testing + sleepFn(r.RetryDelay) + } else if err := aws.SleepWithContext(r.Context(), r.RetryDelay); err != nil { + r.Error = awserr.New(request.CanceledErrorCode, + "request context canceled", err) + r.Retryable = aws.Bool(false) + return + } - // when the expired token exception occurs the credentials - // need to be expired locally so that the next request to - // get credentials will trigger a credentials refresh. - if r.IsErrorExpired() { - r.Config.Credentials.Expire() - } + // when the expired token exception occurs the credentials + // need to be expired locally so that the next request to + // get credentials will trigger a credentials refresh. + if r.IsErrorExpired() { + r.Config.Credentials.Expire() + } - r.RetryCount++ - r.Error = nil - } -}} + r.RetryCount++ + r.Error = nil + } + }} // ValidateEndpointHandler is a request handler to validate a request had the // appropriate Region and Endpoint set. Will set r.Error if the endpoint or diff --git a/aws/request/http_request_retry_test.go b/aws/request/http_request_retry_test.go index fcdd1ce819b..8cc3b043d5e 100644 --- a/aws/request/http_request_retry_test.go +++ b/aws/request/http_request_retry_test.go @@ -1,7 +1,6 @@ package request_test import ( - "errors" "strings" "testing" @@ -14,14 +13,15 @@ func TestRequestCancelRetry(t *testing.T) { c := make(chan struct{}) reqNum := 0 - s := mock.NewMockClient(aws.NewConfig().WithMaxRetries(10)) + s := mock.NewMockClient(&aws.Config{ + MaxRetries: aws.Int(1), + }) s.Handlers.Validate.Clear() s.Handlers.Unmarshal.Clear() s.Handlers.UnmarshalMeta.Clear() s.Handlers.UnmarshalError.Clear() s.Handlers.Send.PushFront(func(r *request.Request) { reqNum++ - r.Error = errors.New("net/http: request canceled") }) out := &testData{} r := s.NewRequest(&request.Operation{Name: "Operation"}, nil, out) diff --git a/aws/request/request.go b/aws/request/request.go index e7c9b2b61af..8e332cce6a6 100644 --- a/aws/request/request.go +++ b/aws/request/request.go @@ -4,7 +4,6 @@ import ( "bytes" "fmt" "io" - "net" "net/http" "net/url" "reflect" @@ -65,6 +64,15 @@ type Request struct { LastSignedAt time.Time DisableFollowRedirects bool + // Additional API error codes that should be retried. IsErrorRetryable + // will consider these codes in addition to its built in cases. + RetryErrorCodes []string + + // Additional API error codes that should be retried with throttle backoff + // delay. IsErrorThrottle will consider these codes in addition to its + // built in cases. + ThrottleErrorCodes []string + // A value greater than 0 instructs the request to be signed as Presigned URL // You should not set this field directly. Instead use Request's // Presign or PresignRequest methods. @@ -498,21 +506,17 @@ func (r *Request) Send() error { if err := r.sendRequest(); err == nil { return nil - } else if !shouldRetryError(r.Error) { + } + r.Handlers.Retry.Run(r) + r.Handlers.AfterRetry.Run(r) + + if r.Error != nil || !aws.BoolValue(r.Retryable) { + return r.Error + } + + if err := r.prepareRetry(); err != nil { + r.Error = err return err - } else { - r.Handlers.Retry.Run(r) - r.Handlers.AfterRetry.Run(r) - - if r.Error != nil || !aws.BoolValue(r.Retryable) { - return r.Error - } - - if err := r.prepareRetry(); err != nil { - r.Error = err - return err - } - continue } } } @@ -596,51 +600,6 @@ func AddToUserAgent(r *Request, s string) { r.HTTPRequest.Header.Set("User-Agent", s) } -type temporary interface { - Temporary() bool -} - -func shouldRetryError(origErr error) bool { - switch err := origErr.(type) { - case awserr.Error: - if err.Code() == CanceledErrorCode { - return false - } - return shouldRetryError(err.OrigErr()) - case *url.Error: - if strings.Contains(err.Error(), "connection refused") { - // Refused connections should be retried as the service may not yet - // be running on the port. Go TCP dial considers refused - // connections as not temporary. - return true - } - // *url.Error only implements Temporary after golang 1.6 but since - // url.Error only wraps the error: - return shouldRetryError(err.Err) - case temporary: - if netErr, ok := err.(*net.OpError); ok && netErr.Op == "dial" { - return true - } - // If the error is temporary, we want to allow continuation of the - // retry process - return err.Temporary() || isErrConnectionReset(origErr) - case nil: - // `awserr.Error.OrigErr()` can be nil, meaning there was an error but - // because we don't know the cause, it is marked as retryable. See - // TestRequest4xxUnretryable for an example. - return true - default: - switch err.Error() { - case "net/http: request canceled", - "net/http: request canceled while waiting for connection": - // known 1.5 error case when an http request is cancelled - return false - } - // here we don't know the error; so we allow a retry. - return true - } -} - // SanitizeHostForHeader removes default port from host and updates request.Host func SanitizeHostForHeader(r *http.Request) { host := getHost(r) diff --git a/aws/request/request_retry_test.go b/aws/request/request_retry_test.go index 60a8dfbf147..c24fc9f422e 100644 --- a/aws/request/request_retry_test.go +++ b/aws/request/request_retry_test.go @@ -31,12 +31,12 @@ func TestShouldRetryError_timeout(t *testing.T) { tr := &http.Transport{} defer tr.CloseIdleConnections() - cli := http.Client{ + client := http.Client{ Timeout: time.Nanosecond, Transport: tr, } - resp, err := cli.Do(newRequest(t, "https://179.179.179.179/no/such/host")) + resp, err := client.Do(newRequest(t, "https://179.179.179.179/no/such/host")) if resp != nil { resp.Body.Close() } @@ -53,7 +53,7 @@ func TestShouldRetryError_timeout(t *testing.T) { func TestShouldRetryError_cancelled(t *testing.T) { tr := &http.Transport{} defer tr.CloseIdleConnections() - cli := http.Client{ + client := http.Client{ Transport: tr, } @@ -82,7 +82,7 @@ func TestShouldRetryError_cancelled(t *testing.T) { close(ch) // request is cancelled before anything }() - resp, err := cli.Do(r) + resp, err := client.Do(r) if resp != nil { resp.Body.Close() } @@ -131,10 +131,7 @@ func debugerr(t *testing.T, err error) { t.Logf("%s is a temporary error: %t", err, err.Temporary()) return case *url.Error: - // we should be before 1.5 - // that's our case ! - t.Logf("err: %s", err) - t.Logf("err: %#v", err.Err) + t.Logf("err: %s, nested err: %#v", err, err.Err) if operr, ok := err.Err.(*net.OpError); ok { t.Logf("operr: %#v", operr) } @@ -144,3 +141,62 @@ func debugerr(t *testing.T, err error) { return } } + +func TestRequest_retryCustomCodes(t *testing.T) { + cases := map[string]struct { + Code string + RetryErrorCodes []string + ThrottleErrorCodes []string + Retryable bool + Throttle bool + }{ + "retry code": { + Code: "RetryMePlease", + RetryErrorCodes: []string{ + "RetryMePlease", + "SomeOtherError", + }, + Retryable: true, + }, + "throttle code": { + Code: "AThrottleableError", + RetryErrorCodes: []string{ + "RetryMePlease", + "SomeOtherError", + }, + ThrottleErrorCodes: []string{ + "AThrottleableError", + "SomeOtherError", + }, + Throttle: true, + }, + "unknown code": { + Code: "UnknownCode", + RetryErrorCodes: []string{ + "RetryMePlease", + "SomeOtherError", + }, + Retryable: false, + }, + } + + for name, c := range cases { + req := Request{ + HTTPRequest: &http.Request{}, + HTTPResponse: &http.Response{}, + Error: awserr.New(c.Code, "some error", nil), + RetryErrorCodes: c.RetryErrorCodes, + ThrottleErrorCodes: c.ThrottleErrorCodes, + } + + retryable := req.IsErrorRetryable() + if e, a := c.Retryable, retryable; e != a { + t.Errorf("%s, expect %v retryable, got %v", name, e, a) + } + + throttle := req.IsErrorThrottle() + if e, a := c.Throttle, throttle; e != a { + t.Errorf("%s, expect %v throttle, got %v", name, e, a) + } + } +} diff --git a/aws/request/request_test.go b/aws/request/request_test.go index a7a229baade..477c761ea72 100644 --- a/aws/request/request_test.go +++ b/aws/request/request_test.go @@ -181,13 +181,18 @@ func TestRequestRecoverRetry4xxRetryable(t *testing.T) { // test that retries don't occur for 4xx status codes with a response type that can't be retried func TestRequest4xxUnretryable(t *testing.T) { - s := awstesting.NewClient(aws.NewConfig().WithMaxRetries(10)) + s := awstesting.NewClient(&aws.Config{ + MaxRetries: aws.Int(1), + }) s.Handlers.Validate.Clear() s.Handlers.Unmarshal.PushBack(unmarshal) s.Handlers.UnmarshalError.PushBack(unmarshalError) s.Handlers.Send.Clear() // mock sending s.Handlers.Send.PushBack(func(r *request.Request) { - r.HTTPResponse = &http.Response{StatusCode: 401, Body: body(`{"__type":"SignatureDoesNotMatch","message":"Signature does not match."}`)} + r.HTTPResponse = &http.Response{ + StatusCode: 401, + Body: body(`{"__type":"SignatureDoesNotMatch","message":"Signature does not match."}`), + } }) out := &testData{} r := s.NewRequest(&request.Operation{Name: "Operation"}, nil, out) @@ -580,7 +585,7 @@ func TestIsSerializationErrorRetryable(t *testing.T) { Error: c.err, } if r.IsErrorRetryable() != c.expected { - t.Errorf("Case %d: Expected %v, but received %v", i+1, c.expected, !c.expected) + t.Errorf("Case %d: Expected %v, but received %v", i, c.expected, !c.expected) } } } @@ -1124,7 +1129,6 @@ func TestRequestBodySeekFails(t *testing.T) { if err == nil { t.Fatal("expect error, but got none") } - t.Log("Error:", err) aerr := err.(awserr.Error) if e, a := request.ErrCodeSerialization, aerr.Code(); e != a { diff --git a/aws/request/retryer.go b/aws/request/retryer.go index d0aa54c6d10..ede5341bc2e 100644 --- a/aws/request/retryer.go +++ b/aws/request/retryer.go @@ -1,23 +1,41 @@ package request import ( + "net" + "net/url" + "strings" "time" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/awserr" ) -// Retryer is an interface to control retry logic for a given service. -// The default implementation used by most services is the client.DefaultRetryer -// structure, which contains basic retry logic using exponential backoff. +// Retryer provides the interface drive the SDK's request retry behavior. The +// Retryer implementation is responsible for implementing exponential backoff, +// and determine if a request API error should be retried. +// +// client.DefaultRetryer is the SDK's default implementation of the Retryer. It +// uses the which uses the Request.IsErrorRetryable and Request.IsErrorThrottle +// methods to determine if the request is retried. type Retryer interface { + // RetryRules return the retry delay that should be used by the SDK before + // making another request attempt for the failed request. RetryRules(*Request) time.Duration + + // ShouldRetry returns if the failed request is retryable. + // + // Implementations may consider request attempt count when determining if a + // request is retryable, but the SDK will use MaxRetries to limit the + // number of attempts a request are made. ShouldRetry(*Request) bool + + // MaxRetries is the number of times a request may be retried before + // failing. MaxRetries() int } -// WithRetryer sets a config Retryer value to the given Config returning it -// for chaining. +// WithRetryer sets a Retryer value to the given Config returning the Config +// value for chaining. func WithRetryer(cfg *aws.Config, retryer Retryer) *aws.Config { cfg.Retryer = retryer return cfg @@ -108,32 +126,87 @@ func isNestedErrorRetryable(parentErr awserr.Error) bool { // IsErrorRetryable returns whether the error is retryable, based on its Code. // Returns false if error is nil. func IsErrorRetryable(err error) bool { - if err != nil { - if aerr, ok := err.(awserr.Error); ok { - return isCodeRetryable(aerr.Code()) || isNestedErrorRetryable(aerr) + return shouldRetryError(err) +} + +type temporary interface { + Temporary() bool +} + +func shouldRetryError(origErr error) bool { + switch err := origErr.(type) { + case awserr.Error: + if err.Code() == CanceledErrorCode { + return false + } + if isNestedErrorRetryable(err) { + return true + } + + origErr := err.OrigErr() + var shouldRetry bool + if origErr != nil { + shouldRetry := shouldRetryError(origErr) + if err.Code() == "RequestError" && !shouldRetry { + return false + } } + if isCodeRetryable(err.Code()) { + return true + } + return shouldRetry + + case *url.Error: + if strings.Contains(err.Error(), "connection refused") { + // Refused connections should be retried as the service may not yet + // be running on the port. Go TCP dial considers refused + // connections as not temporary. + return true + } + // *url.Error only implements Temporary after golang 1.6 but since + // url.Error only wraps the error: + return shouldRetryError(err.Err) + + case temporary: + if netErr, ok := err.(*net.OpError); ok && netErr.Op == "dial" { + return true + } + // If the error is temporary, we want to allow continuation of the + // retry process + return err.Temporary() || isErrConnectionReset(origErr) + + case nil: + // `awserr.Error.OrigErr()` can be nil, meaning there was an error but + // because we don't know the cause, it is marked as retryable. See + // TestRequest4xxUnretryable for an example. + return true + + default: + switch err.Error() { + case "net/http: request canceled", + "net/http: request canceled while waiting for connection": + // known 1.5 error case when an http request is cancelled + return false + } + // here we don't know the error; so we allow a retry. + return true } - return false } // IsErrorThrottle returns whether the error is to be throttled based on its code. // Returns false if error is nil. func IsErrorThrottle(err error) bool { - if err != nil { - if aerr, ok := err.(awserr.Error); ok { - return isCodeThrottle(aerr.Code()) - } + if aerr, ok := err.(awserr.Error); ok && aerr != nil { + return isCodeThrottle(aerr.Code()) } return false } -// IsErrorExpiredCreds returns whether the error code is a credential expiry error. -// Returns false if error is nil. +// IsErrorExpiredCreds returns whether the error code is a credential expiry +// error. Returns false if error is nil. func IsErrorExpiredCreds(err error) bool { - if err != nil { - if aerr, ok := err.(awserr.Error); ok { - return isCodeExpiredCreds(aerr.Code()) - } + if aerr, ok := err.(awserr.Error); ok && aerr != nil { + return isCodeExpiredCreds(aerr.Code()) } return false } @@ -143,17 +216,47 @@ func IsErrorExpiredCreds(err error) bool { // // Alias for the utility function IsErrorRetryable func (r *Request) IsErrorRetryable() bool { + if r.Error == nil { + return false + } + if isErrCode(r.Error, r.RetryErrorCodes) { + return true + } + return IsErrorRetryable(r.Error) } -// IsErrorThrottle returns whether the error is to be throttled based on its code. -// Returns false if the request has no Error set +// IsErrorThrottle returns whether the error is to be throttled based on its +// code. Returns false if the request has no Error set. // // Alias for the utility function IsErrorThrottle func (r *Request) IsErrorThrottle() bool { + if isErrCode(r.Error, r.ThrottleErrorCodes) { + return true + } + + if r.HTTPResponse != nil { + switch r.HTTPResponse.StatusCode { + case 429, 502, 503, 504: + return true + } + } + return IsErrorThrottle(r.Error) } +func isErrCode(err error, codes []string) bool { + if aerr, ok := err.(awserr.Error); ok { + for _, code := range codes { + if code == aerr.Code() { + return true + } + } + } + + return false +} + // IsErrorExpired returns whether the error code is a credential expiry error. // Returns false if the request has no Error set. //