From 7bb0a2336ae7dffafa5dfbdd779f8bdf58727744 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Thu, 21 Jul 2016 15:49:54 -0700 Subject: [PATCH] Add killing event and mark task as not running when killed --- api/tasks.go | 2 ++ client/task_runner.go | 10 ++++++++++ command/alloc_status.go | 6 ++++++ nomad/structs/structs.go | 11 +++++++++++ website/source/docs/http/alloc.html.md | 1 + 5 files changed, 30 insertions(+) diff --git a/api/tasks.go b/api/tasks.go index 1ab7a2e88f4..f3a60abd45a 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -207,6 +207,7 @@ const ( TaskFailedValidation = "Failed Validation" TaskStarted = "Started" TaskTerminated = "Terminated" + TaskKilling = "Killing" TaskKilled = "Killed" TaskRestarting = "Restarting" TaskNotRestarting = "Not Restarting" @@ -224,6 +225,7 @@ type TaskEvent struct { ExitCode int Signal int Message string + KillTimeout time.Duration KillError string StartDelay int64 DownloadError string diff --git a/client/task_runner.go b/client/task_runner.go index 4551f4a3e41..85a842d86d1 100644 --- a/client/task_runner.go +++ b/client/task_runner.go @@ -392,6 +392,11 @@ func (r *TaskRunner) run() { r.logger.Printf("[ERR] client: update to task %q failed: %v", r.task.Name, err) } case <-r.destroyCh: + // Mark that we received the kill event + timeout := driver.GetKillTimeout(r.task.KillTimeout, r.config.MaxKillTimeout) + r.setState(structs.TaskStateRunning, + structs.NewTaskEvent(structs.TaskKilling).SetKillTimeout(timeout)) + // Kill the task using an exponential backoff in-case of failures. destroySuccess, err := r.handleDestroy() if !destroySuccess { @@ -404,6 +409,11 @@ func (r *TaskRunner) run() { // Store that the task has been destroyed and any associated error. r.setState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskKilled).SetKillError(err)) + + r.runningLock.Lock() + r.running = false + r.runningLock.Unlock() + return } } diff --git a/command/alloc_status.go b/command/alloc_status.go index 9b88c30b448..35179fca97d 100644 --- a/command/alloc_status.go +++ b/command/alloc_status.go @@ -223,6 +223,12 @@ func (c *AllocStatusCommand) outputTaskStatus(state *api.TaskState) { } else { desc = "Failed to download artifacts" } + case api.TaskKilling: + if event.KillTimeout != 0 { + desc = fmt.Sprintf("Sent interupt. Waiting %v before force killing", event.KillTimeout) + } else { + desc = "Sent interupt" + } case api.TaskKilled: if event.KillError != "" { desc = event.KillError diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 8f3b8827256..6e162c297d8 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -2100,6 +2100,9 @@ const ( // TaskTerminated indicates that the task was started and exited. TaskTerminated = "Terminated" + // TaskKilling indicates a kill signal has been sent to the task. + TaskKilling = "Killing" + // TaskKilled indicates a user has killed the task. TaskKilled = "Killed" @@ -2136,6 +2139,9 @@ type TaskEvent struct { Signal int // The signal that terminated the task. Message string // A possible message explaining the termination of the task. + // Killing fields + KillTimeout time.Duration + // Task Killed Fields. KillError string // Error killing the task. @@ -2224,6 +2230,11 @@ func (e *TaskEvent) SetValidationError(err error) *TaskEvent { return e } +func (e *TaskEvent) SetKillTimeout(timeout time.Duration) *TaskEvent { + e.KillTimeout = timeout + return e +} + // TaskArtifact is an artifact to download before running the task. type TaskArtifact struct { // GetterSource is the source to download an artifact using go-getter diff --git a/website/source/docs/http/alloc.html.md b/website/source/docs/http/alloc.html.md index 066ef995099..1197b72eacc 100644 --- a/website/source/docs/http/alloc.html.md +++ b/website/source/docs/http/alloc.html.md @@ -254,6 +254,7 @@ be specified using the `?region=` query parameter. * `Started` - The task was started; either for the first time or due to a restart. * `Terminated` - The task was started and exited. + * `Killing` - The task has been sent the kill signal. * `Killed` - The task was killed by an user. * `Received` - The task has been pulled by the client at the given timestamp. * `Failed Validation` - The task was invalid and as such it didn't run.