From b9e3e12ed7f9a382dd372b23c05e6d4e8093d393 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Tue, 17 Mar 2020 11:43:19 -0400 Subject: [PATCH] tests: add a check for failing service checks Add tests to check for failing or missing service checks in consul update. --- client/allocrunner/health_hook_test.go | 179 ++++++++++++++++++++++++- 1 file changed, 177 insertions(+), 2 deletions(-) diff --git a/client/allocrunner/health_hook_test.go b/client/allocrunner/health_hook_test.go index e4e3cfd2cf3..454ea52b915 100644 --- a/client/allocrunner/health_hook_test.go +++ b/client/allocrunner/health_hook_test.go @@ -219,9 +219,9 @@ func TestHealthHook_Postrun(t *testing.T) { require.NoError(h.Postrun()) } -// TestHealthHook_SetHealth asserts SetHealth is called when health status is +// TestHealthHook_SetHealth_healthy asserts SetHealth is called when health status is // set. Uses task state and health checks. -func TestHealthHook_SetHealth(t *testing.T) { +func TestHealthHook_SetHealth_healthy(t *testing.T) { t.Parallel() require := require.New(t) @@ -300,6 +300,181 @@ func TestHealthHook_SetHealth(t *testing.T) { require.NoError(h.Postrun()) } +// TestHealthHook_SetHealth_unhealthy asserts SetHealth notices unhealthy allocs +func TestHealthHook_SetHealth_unhealthy(t *testing.T) { + t.Parallel() + require := require.New(t) + + alloc := mock.Alloc() + alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up + task := alloc.Job.TaskGroups[0].Tasks[0] + + newCheck := task.Services[0].Checks[0].Copy() + newCheck.Name = "failing-check" + task.Services[0].Checks = append(task.Services[0].Checks, newCheck) + + // Synthesize running alloc and tasks + alloc.ClientStatus = structs.AllocClientStatusRunning + alloc.TaskStates = map[string]*structs.TaskState{ + task.Name: { + State: structs.TaskStateRunning, + StartedAt: time.Now(), + }, + } + + // Make Consul response + checkHealthy := &consulapi.AgentCheck{ + Name: task.Services[0].Checks[0].Name, + Status: consulapi.HealthPassing, + } + checksUnhealthy := &consulapi.AgentCheck{ + Name :task.Services[0].Checks[1].Name, + Status: consulapi.HealthCritical, + } + taskRegs := map[string]*agentconsul.ServiceRegistrations{ + task.Name: { + Services: map[string]*agentconsul.ServiceRegistration{ + task.Services[0].Name: { + Service: &consulapi.AgentService{ + ID: "foo", + Service: task.Services[0].Name, + }, + Checks: []*consulapi.AgentCheck{checkHealthy, checksUnhealthy}, + }, + }, + }, + } + + logger := testlog.HCLogger(t) + b := cstructs.NewAllocBroadcaster(logger) + defer b.Close() + + // Don't reply on the first call + called := false + consul := consul.NewMockConsulServiceClient(t, logger) + consul.AllocRegistrationsFn = func(string) (*agentconsul.AllocRegistration, error) { + if !called { + called = true + return nil, nil + } + + reg := &agentconsul.AllocRegistration{ + Tasks: taskRegs, + } + + return reg, nil + } + + hs := newMockHealthSetter() + + h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul).(*allocHealthWatcherHook) + + // Prerun + require.NoError(h.Prerun()) + + // Wait to ensure we don't get a healthy status + select { + case <-time.After(5 * time.Second): + // great no healthy status + case health := <-hs.healthCh: + require.False(health.healthy) + + // Unhealthy allocs shouldn't emit task events + ev := health.taskEvents[task.Name] + require.NotNilf(ev, "%#v", health.taskEvents) + } + + // Postrun + require.NoError(h.Postrun()) +} + +// TestHealthHook_SetHealth_missingchecks asserts SetHealth recovers from +// missing checks +func TestHealthHook_SetHealth_missingchecks(t *testing.T) { + t.Parallel() + require := require.New(t) + + alloc := mock.Alloc() + alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up + task := alloc.Job.TaskGroups[0].Tasks[0] + + newCheck := task.Services[0].Checks[0].Copy() + newCheck.Name = "failing-check" + task.Services[0].Checks = append(task.Services[0].Checks, newCheck) + + // Synthesize running alloc and tasks + alloc.ClientStatus = structs.AllocClientStatusRunning + alloc.TaskStates = map[string]*structs.TaskState{ + task.Name: { + State: structs.TaskStateRunning, + StartedAt: time.Now(), + }, + } + + // Make Consul response + checkHealthy := &consulapi.AgentCheck{ + Name: task.Services[0].Checks[0].Name, + Status: consulapi.HealthPassing, + } + taskRegs := map[string]*agentconsul.ServiceRegistrations{ + task.Name: { + Services: map[string]*agentconsul.ServiceRegistration{ + task.Services[0].Name: { + Service: &consulapi.AgentService{ + ID: "foo", + Service: task.Services[0].Name, + }, + // notice missing check + Checks: []*consulapi.AgentCheck{checkHealthy }, + }, + }, + }, + } + + logger := testlog.HCLogger(t) + b := cstructs.NewAllocBroadcaster(logger) + defer b.Close() + + // Don't reply on the first call + called := false + consul := consul.NewMockConsulServiceClient(t, logger) + consul.AllocRegistrationsFn = func(string) (*agentconsul.AllocRegistration, error) { + if !called { + called = true + return nil, nil + } + + reg := &agentconsul.AllocRegistration{ + Tasks: taskRegs, + } + + return reg, nil + } + + hs := newMockHealthSetter() + + h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul).(*allocHealthWatcherHook) + + // Prerun + require.NoError(h.Prerun()) + + // Wait to ensure we don't get a healthy status + select { + case <-time.After(5 * time.Second): + // great no healthy status + case health := <-hs.healthCh: + require.False(health.healthy) + + // Unhealthy allocs shouldn't emit task events + ev := health.taskEvents[task.Name] + require.NotNilf(ev, "%#v", health.taskEvents) + } + + // Postrun + require.NoError(h.Postrun()) +} + + // TestHealthHook_SystemNoop asserts that system jobs return the noop tracker. func TestHealthHook_SystemNoop(t *testing.T) { t.Parallel()