Skip to content

Commit

Permalink
Merge pull request #1405 from novilabs/delay-on-startup-failure
Browse files Browse the repository at this point in the history
do not fail for multiple startup failures, delay instead
  • Loading branch information
dadgar authored Jul 12, 2016
2 parents 3ff6f63 + f884324 commit 20b6193
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 5 deletions.
12 changes: 9 additions & 3 deletions client/restarts.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,15 @@ func (r *RestartTracker) handleStartError() (string, time.Duration) {
}

if r.count > r.policy.Attempts {
r.reason = fmt.Sprintf("Exceeded allowed attempts %d in interval %v",
r.policy.Attempts, r.policy.Interval)
return structs.TaskNotRestarting, 0
if r.policy.Mode == structs.RestartPolicyModeFail {
r.reason = fmt.Sprintf(
`Exceeded allowed atttempts %d in interval %v and mode is "fail"`,
r.policy.Attempts, r.policy.Interval)
return structs.TaskNotRestarting, 0
} else {
r.reason = ReasonDelay
return structs.TaskRestarting, r.getDelay()
}
}

r.reason = ReasonWithinPolicy
Expand Down
29 changes: 27 additions & 2 deletions client/restarts_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,9 @@ func TestClient_RestartTracker_ZeroAttempts(t *testing.T) {
}
}

func TestClient_RestartTracker_StartError_Recoverable(t *testing.T) {
func TestClient_RestartTracker_StartError_Recoverable_Fail(t *testing.T) {
t.Parallel()
p := testPolicy(true, structs.RestartPolicyModeDelay)
p := testPolicy(true, structs.RestartPolicyModeFail)
rt := newRestartTracker(p, structs.JobTypeSystem)
recErr := cstructs.NewRecoverableError(fmt.Errorf("foo"), true)
for i := 0; i < p.Attempts; i++ {
Expand All @@ -114,3 +114,28 @@ func TestClient_RestartTracker_StartError_Recoverable(t *testing.T) {
t.Fatalf("NextRestart() returned %v; want %v", state, structs.TaskNotRestarting)
}
}

func TestClient_RestartTracker_StartError_Recoverable_Delay(t *testing.T) {
t.Parallel()
p := testPolicy(true, structs.RestartPolicyModeDelay)
rt := newRestartTracker(p, structs.JobTypeSystem)
recErr := cstructs.NewRecoverableError(fmt.Errorf("foo"), true)
for i := 0; i < p.Attempts; i++ {
state, when := rt.SetStartError(recErr).GetState()
if state != structs.TaskRestarting {
t.Fatalf("NextRestart() returned %v, want %v", state, structs.TaskRestarting)
}
if !withinJitter(p.Delay, when) {
t.Fatalf("NextRestart() returned %v; want %v+jitter", when, p.Delay)
}
}

// Next restart should cause delay
state, when := rt.SetStartError(recErr).GetState()
if state != structs.TaskRestarting {
t.Fatalf("NextRestart() returned %v; want %v", state, structs.TaskRestarting)
}
if !(when > p.Delay && when <= p.Interval) {
t.Fatalf("NextRestart() returned %v; want > %v and <= %v", when, p.Delay, p.Interval)
}
}

0 comments on commit 20b6193

Please sign in to comment.