From a0a0d1d85dbb255dd1e8fc4da7f0a52d3bfd67a1 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Sun, 10 Sep 2017 17:22:03 -0700 Subject: [PATCH] Add comments and move delay calc to TaskRunner --- client/task_runner.go | 7 ++++--- command/agent/consul/check_watcher.go | 17 ++++++++++++----- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/client/task_runner.go b/client/task_runner.go index e31371026b9..cd7f40b3908 100644 --- a/client/task_runner.go +++ b/client/task_runner.go @@ -1716,10 +1716,11 @@ func (r *TaskRunner) Restart(source, reason string, failure bool) { } } -// RestartDelay returns the value of the delay for this task's restart policy -// for use by the healtcheck watcher. +// RestartDelay returns the *max* value of the delay for this task's restart +// policy for use by the healtcheck watcher. func (r *TaskRunner) RestartDelay() time.Duration { - return r.alloc.Job.LookupTaskGroup(r.alloc.TaskGroup).RestartPolicy.Delay + delay := r.alloc.Job.LookupTaskGroup(r.alloc.TaskGroup).RestartPolicy.Delay + return delay + time.Duration(float64(delay)*jitter) } // Signal will send a signal to the task diff --git a/command/agent/consul/check_watcher.go b/command/agent/consul/check_watcher.go index 6261c7d8015..af19bb01984 100644 --- a/command/agent/consul/check_watcher.go +++ b/command/agent/consul/check_watcher.go @@ -15,10 +15,14 @@ const ( defaultPollFreq = 900 * time.Millisecond ) -type ConsulChecks interface { +// ChecksAPI is the part of the Consul API the checkWatcher requires. +type ChecksAPI interface { + // Checks returns a list of all checks. Checks() (map[string]*api.AgentCheck, error) } +// TaskRestarter allows the checkWatcher to restart tasks and how long the +// grace period should be afterward. type TaskRestarter interface { RestartDelay() time.Duration Restart(source, reason string, failure bool) @@ -96,8 +100,8 @@ func (c *checkRestart) update(now time.Time, status string) { const failure = true c.task.Restart("healthcheck", fmt.Sprintf("check %q unhealthy", c.checkName), failure) - // Reset grace time to grace + restart.delay + (restart.delay * 25%) (the max jitter) - c.graceUntil = now.Add(c.grace + c.restartDelay + time.Duration(float64(c.restartDelay)*0.25)) + // Reset grace time to grace + restart.delay + c.graceUntil = now.Add(c.grace + c.restartDelay) c.unhealthyStart = time.Time{} } } @@ -105,10 +109,13 @@ func (c *checkRestart) update(now time.Time, status string) { // checkWatcher watches Consul checks and restarts tasks when they're // unhealthy. type checkWatcher struct { - consul ConsulChecks + consul ChecksAPI + // pollFreq is how often to poll the checks API and defaults to + // defaultPollFreq pollFreq time.Duration + // watchCh is how watches (and removals) are sent to the main watching loop watchCh chan *checkRestart // done is closed when Run has exited @@ -122,7 +129,7 @@ type checkWatcher struct { } // newCheckWatcher creates a new checkWatcher but does not call its Run method. -func newCheckWatcher(logger *log.Logger, consul ConsulChecks) *checkWatcher { +func newCheckWatcher(logger *log.Logger, consul ChecksAPI) *checkWatcher { return &checkWatcher{ consul: consul, pollFreq: defaultPollFreq,