hashicorp · schmichael · Jul 18, 2019 · Jul 17, 2019 · Jul 18, 2019 · Jul 18, 2019
diff --git a/command/agent/consul/check_watcher.go b/command/agent/consul/check_watcher.go
@@ -103,20 +103,34 @@ func (c *checkRestart) apply(ctx context.Context, now time.Time, status string)
 		c.logger.Debug("restarting due to unhealthy check")
 
 		// Tell TaskRunner to restart due to failure
-		const failure = true
 		reason := fmt.Sprintf("healthcheck: check %q unhealthy", c.checkName)
 		event := structs.NewTaskEvent(structs.TaskRestartSignal).SetRestartReason(reason)
-		err := c.task.Restart(ctx, event, failure)
-		if err != nil {
-			// Error restarting
-			return false
-		}
+		go asyncRestart(ctx, c.logger, c.task, event)
 		return true
 	}
 
 	return false
 }
 
+// asyncRestart mimics the pre-0.9 TaskRunner.Restart behavior and is intended
+// to be called in a goroutine.
+func asyncRestart(ctx context.Context, logger log.Logger, task TaskRestarter, event *structs.TaskEvent) {
+	// Check watcher restarts are always failures
+	const failure = true
+
+	// Restarting is asynchronous so there's no reason to allow this
+	// goroutine to block indefinitely.
+	ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
+	defer cancel()
+
+	if err := task.Restart(ctx, event, failure); err != nil {
+		// Restart errors are not actionable and only relevant when
+		// debugging allocation lifecycle management.
+		logger.Debug("error restarting task", "error", err,
+			"event_time", event.Time, "event_type", event.Type)
+	}
+}
+
 // checkWatchUpdates add or remove checks from the watcher
 type checkWatchUpdate struct {
 	checkID      string

diff --git a/command/agent/consul/check_watcher_test.go b/command/agent/consul/check_watcher_test.go
@@ -9,6 +9,7 @@ import (
 	"github.com/hashicorp/consul/api"
 	"github.com/hashicorp/nomad/helper/testlog"
 	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/stretchr/testify/require"
 )
 
 // checkRestartRecord is used by a testFakeCtx to record when restarts occur
@@ -194,6 +195,28 @@ func TestCheckWatcher_Healthy(t *testing.T) {
 	}
 }
 
+// TestCheckWatcher_Unhealthy asserts unhealthy tasks are restarted exactly once.
+func TestCheckWatcher_Unhealthy(t *testing.T) {
+	t.Parallel()
+
+	fakeAPI, cw := testWatcherSetup(t)
+
+	check1 := testCheck()
+	restarter1 := newFakeCheckRestarter(cw, "testalloc1", "testtask1", "testcheck1", check1)
+	cw.Watch("testalloc1", "testtask1", "testcheck1", check1, restarter1)
+
+	// Check has always been failing
+	fakeAPI.add("testcheck1", "critical", time.Time{})
+
+	// Run
+	ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
+	defer cancel()
+	cw.Run(ctx)
+
+	// Ensure restart was called exactly once
+	require.Len(t, restarter1.restarts, 1)
+}
+
 // TestCheckWatcher_HealthyWarning asserts checks in warning with
 // ignore_warnings=true do not restart tasks.
 func TestCheckWatcher_HealthyWarning(t *testing.T) {