Skip to content

Commit

Permalink
health check account for task lifecycle
Browse files Browse the repository at this point in the history
In service jobs, lifecycles non-sidecar task tweak health logic a bit:
they may terminate successfully without impacting alloc health, but fail
the alloc if they fail.

Sidecars should be treated just like a normal task.
  • Loading branch information
Mahmood Ali committed Mar 22, 2020
1 parent 3132176 commit 1454af7
Showing 1 changed file with 24 additions and 10 deletions.
34 changes: 24 additions & 10 deletions client/allochealth/tracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ type Tracker struct {
// not needed
allocStopped chan struct{}

// lifecycleTasks is a set of tasks with lifecycle hook set and may
// terminate without affecting alloc health
lifecycleTasks map[string]bool

// l is used to lock shared fields listed below
l sync.Mutex

Expand Down Expand Up @@ -106,14 +110,17 @@ func NewTracker(parentCtx context.Context, logger hclog.Logger, alloc *structs.A
consulClient: consulClient,
checkLookupInterval: consulCheckLookupInterval,
logger: logger,
lifecycleTasks: map[string]bool{},
}

t.taskHealth = make(map[string]*taskHealthState, len(t.tg.Tasks))
for _, task := range t.tg.Tasks {
t.taskHealth[task.Name] = &taskHealthState{task: task}
}

for _, task := range t.tg.Tasks {
if task.Lifecycle != nil && !task.Lifecycle.Sidecar {
t.lifecycleTasks[task.Name] = true
}

for _, s := range task.Services {
t.consulCheckCount += len(s.Checks)
}
Expand Down Expand Up @@ -263,9 +270,9 @@ func (t *Tracker) watchTaskEvents() {

// Detect if the alloc is unhealthy or if all tasks have started yet
latestStartTime := time.Time{}
for _, state := range alloc.TaskStates {
for taskName, state := range alloc.TaskStates {
// One of the tasks has failed so we can exit watching
if state.Failed || !state.FinishedAt.IsZero() {
if state.Failed || (!state.FinishedAt.IsZero() && !t.lifecycleTasks[taskName]) {
t.setTaskHealth(false, true)
return
}
Expand Down Expand Up @@ -458,13 +465,20 @@ func (t *taskHealthState) event(deadline time.Time, minHealthyTime time.Duration
if t.state.Failed {
return "Unhealthy because of failed task", true
}
if t.state.State != structs.TaskStateRunning {
return "Task not running by deadline", true
}

// We are running so check if we have been running long enough
if t.state.StartedAt.Add(minHealthyTime).After(deadline) {
return fmt.Sprintf("Task not running for min_healthy_time of %v by deadline", minHealthyTime), true
switch t.state.State {
case structs.TaskStatePending:
return "Task not running by deadline", true
case structs.TaskStateDead:
// hook tasks are healthy when dead successfully
if t.task.Lifecycle == nil || t.task.Lifecycle.Sidecar {
return "Unhealthy because of dead task", true
}
case structs.TaskStateRunning:
// We are running so check if we have been running long enough
if t.state.StartedAt.Add(minHealthyTime).After(deadline) {
return fmt.Sprintf("Task not running for min_healthy_time of %v by deadline", minHealthyTime), true
}
}
}

Expand Down

0 comments on commit 1454af7

Please sign in to comment.