From 29491ad47a87e2459c50751ecdb16ebeb376be6f Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Sun, 22 Nov 2015 23:47:15 -0800 Subject: [PATCH 1/5] Not restarting if a task exited properly --- client/task_runner.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/client/task_runner.go b/client/task_runner.go index c9720d10a0a..61223c45ec2 100644 --- a/client/task_runner.go +++ b/client/task_runner.go @@ -276,15 +276,17 @@ func (r *TaskRunner) run() { return } + waitEvent := r.waitErrorToEvent(waitRes) // Log whether the task was successful or not. if !waitRes.Successful() { r.logger.Printf("[ERR] client: failed to complete task '%s' for alloc '%s': %v", r.task.Name, r.allocID, waitRes) } else { r.logger.Printf("[INFO] client: completed task '%s' for alloc '%s'", r.task.Name, r.allocID) + r.setState(structs.TaskStateDead, waitEvent) + return } // Check if we should restart. If not mark task as dead and exit. - waitEvent := r.waitErrorToEvent(waitRes) shouldRestart, when := r.restartTracker.nextRestart() if !shouldRestart { r.logger.Printf("[INFO] client: Not restarting task: %v for alloc: %v ", r.task.Name, r.allocID) From 27ad0ad2534f02270aa2b2f1ab9c86dc21aa17ba Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Mon, 23 Nov 2015 10:56:38 -0800 Subject: [PATCH 2/5] Making the restart tracker aware of the exit codes --- client/restarts.go | 8 ++++---- client/restarts_test.go | 26 ++++++++++++++++++++++---- client/task_runner.go | 6 ++---- 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/client/restarts.go b/client/restarts.go index ae940c2f1e1..ebc5dacbaa9 100644 --- a/client/restarts.go +++ b/client/restarts.go @@ -11,7 +11,7 @@ import ( // For Batch jobs, the interval is set to zero value since the takss // will be restarted only upto maxAttempts times type restartTracker interface { - nextRestart() (bool, time.Duration) + nextRestart(exitCode int) (bool, time.Duration) } func newRestartTracker(jobType string, restartPolicy *structs.RestartPolicy) restartTracker { @@ -47,8 +47,8 @@ func (b *batchRestartTracker) increment() { b.count += 1 } -func (b *batchRestartTracker) nextRestart() (bool, time.Duration) { - if b.count < b.maxAttempts { +func (b *batchRestartTracker) nextRestart(exitCode int) (bool, time.Duration) { + if (b.count < b.maxAttempts) && (exitCode > 0) { b.increment() return true, b.delay } @@ -68,7 +68,7 @@ func (s *serviceRestartTracker) increment() { s.count += 1 } -func (s *serviceRestartTracker) nextRestart() (bool, time.Duration) { +func (s *serviceRestartTracker) nextRestart(exitCode int) (bool, time.Duration) { defer s.increment() windowEndTime := s.startTime.Add(s.interval) now := time.Now() diff --git a/client/restarts_test.go b/client/restarts_test.go index 9d5b59bb419..b7cf92a32a6 100644 --- a/client/restarts_test.go +++ b/client/restarts_test.go @@ -13,7 +13,7 @@ func TestTaskRunner_ServiceRestartCounter(t *testing.T) { rt := newRestartTracker(structs.JobTypeService, &structs.RestartPolicy{Attempts: attempts, Interval: interval, Delay: delay}) for i := 0; i < attempts; i++ { - actual, when := rt.nextRestart() + actual, when := rt.nextRestart(127) if !actual { t.Fatalf("should restart returned %v, actual %v", actual, true) } @@ -24,7 +24,7 @@ func TestTaskRunner_ServiceRestartCounter(t *testing.T) { time.Sleep(1 * time.Second) for i := 0; i < 3; i++ { - actual, when := rt.nextRestart() + actual, when := rt.nextRestart(127) if !actual { t.Fail() } @@ -46,7 +46,7 @@ func TestTaskRunner_BatchRestartCounter(t *testing.T) { }, ) for i := 0; i < attempts; i++ { - shouldRestart, when := rt.nextRestart() + shouldRestart, when := rt.nextRestart(127) if !shouldRestart { t.Fatalf("should restart returned %v, actual %v", shouldRestart, true) } @@ -54,8 +54,26 @@ func TestTaskRunner_BatchRestartCounter(t *testing.T) { t.Fatalf("Delay should be %v, actual: %v", delay, when) } } - actual, _ := rt.nextRestart() + actual, _ := rt.nextRestart(1) if actual { t.Fatalf("Expect %v, Actual: %v", false, actual) } } + +func TestTaskRunner_BatchRestartOnSuccess(t *testing.T) { + attempts := 2 + interval := 1 * time.Second + delay := 1 * time.Second + rt := newRestartTracker(structs.JobTypeBatch, + &structs.RestartPolicy{Attempts: attempts, + Interval: interval, + Delay: delay, + }, + ) + shouldRestart, _ := rt.nextRestart(0) + if shouldRestart { + t.Fatalf("should restart returned %v, expected: %v", shouldRestart, false) + t.Fail() + } + +} diff --git a/client/task_runner.go b/client/task_runner.go index 61223c45ec2..fd01b1f9653 100644 --- a/client/task_runner.go +++ b/client/task_runner.go @@ -276,18 +276,16 @@ func (r *TaskRunner) run() { return } - waitEvent := r.waitErrorToEvent(waitRes) // Log whether the task was successful or not. if !waitRes.Successful() { r.logger.Printf("[ERR] client: failed to complete task '%s' for alloc '%s': %v", r.task.Name, r.allocID, waitRes) } else { r.logger.Printf("[INFO] client: completed task '%s' for alloc '%s'", r.task.Name, r.allocID) - r.setState(structs.TaskStateDead, waitEvent) - return } // Check if we should restart. If not mark task as dead and exit. - shouldRestart, when := r.restartTracker.nextRestart() + shouldRestart, when := r.restartTracker.nextRestart(waitRes.ExitCode) + waitEvent := r.waitErrorToEvent(waitRes) if !shouldRestart { r.logger.Printf("[INFO] client: Not restarting task: %v for alloc: %v ", r.task.Name, r.allocID) r.setState(structs.TaskStateDead, waitEvent) From b3f6f1aec31475181b2b0937d3dd4a173cd30119 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Mon, 23 Nov 2015 10:59:07 -0800 Subject: [PATCH 3/5] removing redundant fail() --- client/restarts_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/client/restarts_test.go b/client/restarts_test.go index b7cf92a32a6..2faca00ff14 100644 --- a/client/restarts_test.go +++ b/client/restarts_test.go @@ -73,7 +73,6 @@ func TestTaskRunner_BatchRestartOnSuccess(t *testing.T) { shouldRestart, _ := rt.nextRestart(0) if shouldRestart { t.Fatalf("should restart returned %v, expected: %v", shouldRestart, false) - t.Fail() } } From b6cb7d0d7bd7c6dbaac9c47353fd875accead64b Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Mon, 23 Nov 2015 11:01:41 -0800 Subject: [PATCH 4/5] Updated changelog --- CHANGELOG.md | 1 + client/restarts.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f27f776fb92..ac6535466ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ BUG FIXES: * driver/docker: Support `port_map` for static ports [GH-476] * driver/docker: Pass 0.2.0-style port environment variables to the docker container [GH-476] * client/service discovery: Make Service IDs unique [GH-479] + * client/restart policy: Not restarting Batch Jobs if the exit code is 0 ## 0.2.0 (November 18, 2015) diff --git a/client/restarts.go b/client/restarts.go index ebc5dacbaa9..7f42ad432c7 100644 --- a/client/restarts.go +++ b/client/restarts.go @@ -48,7 +48,7 @@ func (b *batchRestartTracker) increment() { } func (b *batchRestartTracker) nextRestart(exitCode int) (bool, time.Duration) { - if (b.count < b.maxAttempts) && (exitCode > 0) { + if b.count < b.maxAttempts && exitCode > 0 { b.increment() return true, b.delay } From 361d874e85306b46a04362aa30c1157a7ef382d8 Mon Sep 17 00:00:00 2001 From: Diptanu Choudhury Date: Mon, 23 Nov 2015 11:03:29 -0800 Subject: [PATCH 5/5] Added the PR number to changelog entry --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ac6535466ae..d5080a96c37 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ BUG FIXES: * driver/docker: Support `port_map` for static ports [GH-476] * driver/docker: Pass 0.2.0-style port environment variables to the docker container [GH-476] * client/service discovery: Make Service IDs unique [GH-479] - * client/restart policy: Not restarting Batch Jobs if the exit code is 0 + * client/restart policy: Not restarting Batch Jobs if the exit code is 0 [GH-491] ## 0.2.0 (November 18, 2015)