Skip to content

Commit

Permalink
retry transient docker errors within function
Browse files Browse the repository at this point in the history
  • Loading branch information
Mahmood Ali committed Sep 13, 2019
1 parent 2f47a6d commit d505168
Showing 1 changed file with 26 additions and 2 deletions.
28 changes: 26 additions & 2 deletions drivers/docker/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ var (
// running operations such as waiting on containers and collect stats
waitClient *docker.Client

dockerTransientErrs = []string{
"Client.Timeout exceeded while awaiting headers",
"EOF",
"API error (500)",
}

// recoverableErrTimeouts returns a recoverable error if the error was due
// to timeouts
recoverableErrTimeouts = func(err error) error {
Expand Down Expand Up @@ -456,6 +462,10 @@ CREATE:
// There is still a very small chance this is possible even with the
// coordinator so retry.
return nil, nstructs.NewRecoverableError(createErr, true)
} else if isDockerTransientError(createErr) && attempted < 5 {
attempted++
time.Sleep(1 * time.Second)
goto CREATE
}

return nil, recoverableErrTimeouts(createErr)
Expand All @@ -474,8 +484,7 @@ START:

d.logger.Debug("failed to start container", "container_id", c.ID, "attempt", attempted+1, "error", startErr)

// If it is a 500 error it is likely we can retry and be successful
if strings.Contains(startErr.Error(), "API error (500)") {
if isDockerTransientError(startErr) {
if attempted < 5 {
attempted++
time.Sleep(1 * time.Second)
Expand Down Expand Up @@ -1458,3 +1467,18 @@ func sliceMergeUlimit(ulimitsRaw map[string]string) ([]docker.ULimit, error) {
func (d *Driver) Shutdown() {
d.signalShutdown()
}

func isDockerTransientError(err error) bool {
if err == nil {
return false
}

errMsg := err.Error()
for _, te := range dockerTransientErrs {
if strings.Contains(errMsg, te) {
return true
}
}

return false
}

0 comments on commit d505168

Please sign in to comment.