diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go index 1c44639830a..90e5b5c85b3 100644 --- a/client/allocrunner/alloc_runner_test.go +++ b/client/allocrunner/alloc_runner_test.go @@ -148,7 +148,15 @@ func TestAllocRunner_TaskGroup_ShutdownDelay(t *testing.T) { tr := alloc.AllocatedResources.Tasks[alloc.Job.TaskGroups[0].Tasks[0].Name] alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 0 - // Create 3 tasks in the task group + // Create a group service + tg := alloc.Job.TaskGroups[0] + tg.Services = []*structs.Service{ + { + Name: "shutdown_service", + }, + } + + // Create two tasks in the group task := alloc.Job.TaskGroups[0].Tasks[0] task.Name = "follower1" task.Driver = "mock_driver" @@ -204,7 +212,7 @@ func TestAllocRunner_TaskGroup_ShutdownDelay(t *testing.T) { upd.Reset() // Stop alloc - now := time.Now() + shutdownInit := time.Now() update := alloc.Copy() update.DesiredStatus = structs.AllocDesiredStatusStop ar.Update(update) @@ -231,9 +239,33 @@ func TestAllocRunner_TaskGroup_ShutdownDelay(t *testing.T) { t.Fatalf("err: %v", err) }) + // Get consul client operations + consulClient := conf.Consul.(*cconsul.MockConsulServiceClient) + consulOpts := consulClient.GetOps() + var groupRemoveOp cconsul.MockConsulOp + for _, op := range consulOpts { + // Grab the first deregistration request + if op.Op == "remove" && op.Name == "group-web" { + groupRemoveOp = op + break + } + } + + // Ensure remove operation is close to shutdown initiation + require.True(t, groupRemoveOp.OccurredAt.Sub(shutdownInit) < 100*time.Millisecond) + last = upd.Last() - require.Greater(t, last.TaskStates["leader"].FinishedAt.UnixNano(), now.Add(shutdownDelay).UnixNano()) - require.Greater(t, last.TaskStates["follower1"].FinishedAt.UnixNano(), now.Add(shutdownDelay).UnixNano()) + minShutdown := shutdownInit.Add(task.ShutdownDelay) + leaderFinished := last.TaskStates["leader"].FinishedAt + followerFinished := last.TaskStates["follower1"].FinishedAt + + // Check that both tasks shut down after min possible shutdown time + require.Greater(t, leaderFinished.UnixNano(), minShutdown.UnixNano()) + require.Greater(t, followerFinished.UnixNano(), minShutdown.UnixNano()) + + // Check that there is at least shutdown_delay between consul + // remove operation and task finished at time + require.True(t, leaderFinished.Sub(groupRemoveOp.OccurredAt) > shutdownDelay) } // TestAllocRunner_TaskLeader_StopTG asserts that when stopping an alloc with a diff --git a/client/allocrunner/groupservice_hook.go b/client/allocrunner/groupservice_hook.go index a02cc9d5b4f..1cd2f10df23 100644 --- a/client/allocrunner/groupservice_hook.go +++ b/client/allocrunner/groupservice_hook.go @@ -139,9 +139,10 @@ func (h *groupServiceHook) PreKill() { h.deregistered = true h.logger.Debug("waiting before removing group service", "shutdown_delay", h.delay) - select { - case <-time.After(h.delay): - } + + // Wait for specified shutdown_delay + // this will block an agent from shutting down + <-time.After(h.delay) } func (h *groupServiceHook) Postrun() error { diff --git a/client/consul/consul_testing.go b/client/consul/consul_testing.go index 6fb92606892..75307eae810 100644 --- a/client/consul/consul_testing.go +++ b/client/consul/consul_testing.go @@ -3,6 +3,7 @@ package consul import ( "fmt" "sync" + "time" log "github.com/hashicorp/go-hclog" @@ -12,9 +13,10 @@ import ( // MockConsulOp represents the register/deregister operations. type MockConsulOp struct { - Op string // add, remove, or update - AllocID string - Name string // task or group name + Op string // add, remove, or update + AllocID string + Name string // task or group name + OccurredAt time.Time } func NewMockConsulOp(op, allocID, name string) MockConsulOp { @@ -25,9 +27,10 @@ func NewMockConsulOp(op, allocID, name string) MockConsulOp { panic(fmt.Errorf("invalid consul op: %s", op)) } return MockConsulOp{ - Op: op, - AllocID: allocID, - Name: name, + Op: op, + AllocID: allocID, + Name: name, + OccurredAt: time.Now(), } }