diff --git a/CHANGELOG.md b/CHANGELOG.md index 92a6b6e3d3a..2d7d826002a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ IMPROVEMENTS: BUG FIXES: * core: Fixed a bug where blocking queries would not include the query's maximum wait time when calculating whether it was safe to retry. [[GH-8921](https://github.com/hashicorp/nomad/issues/8921)] + * consul: Fixed a bug to correctly validate task when using script-checks in group-level services [[GH-8952](https://github.com/hashicorp/nomad/issues/8952)] ## 0.12.5 (September 17, 2020) diff --git a/client/allocrunner/taskrunner/script_check_hook.go b/client/allocrunner/taskrunner/script_check_hook.go index f10486ac526..58d891089c9 100644 --- a/client/allocrunner/taskrunner/script_check_hook.go +++ b/client/allocrunner/taskrunner/script_check_hook.go @@ -204,6 +204,10 @@ func (h *scriptCheckHook) newScriptChecks() map[string]*scriptCheck { // for them. The group-level service and any check restart behaviors it // needs are entirely encapsulated within the group service hook which // watches Consul for status changes. + // + // The script check is associated with a group task if the service.task or + // service.check.task matches the task name. The service.check.task takes + // precedence. tg := h.alloc.Job.LookupTaskGroup(h.alloc.TaskGroup) interpolatedGroupServices := taskenv.InterpolateServices(h.taskEnv, tg.Services) for _, service := range interpolatedGroupServices { @@ -211,7 +215,7 @@ func (h *scriptCheckHook) newScriptChecks() map[string]*scriptCheck { if check.Type != structs.ServiceCheckScript { continue } - if check.TaskName != h.task.Name { + if !h.associated(h.task.Name, service.TaskName, check.TaskName) { continue } groupTaskName := "group-" + tg.Name @@ -237,6 +241,20 @@ func (h *scriptCheckHook) newScriptChecks() map[string]*scriptCheck { return scriptChecks } +// associated returns true if the script check is associated with the task. This +// would be the case if the check.task is the same as task, or if the service.task +// is the same as the task _and_ check.task is not configured (i.e. the check +// inherits the task of the service). +func (*scriptCheckHook) associated(task, serviceTask, checkTask string) bool { + if checkTask == task { + return true + } + if serviceTask == task && checkTask == "" { + return true + } + return false +} + // heartbeater is the subset of consul agent functionality needed by script // checks to heartbeat type heartbeater interface { @@ -371,7 +389,7 @@ const ( updateTTLBackoffLimit = 3 * time.Second ) -// updateTTL updates the state to Consul, performing an expontential backoff +// updateTTL updates the state to Consul, performing an exponential backoff // in the case where the check isn't registered in Consul to avoid a race between // service registration and the first check. func (s *scriptCheck) updateTTL(ctx context.Context, msg, state string) error { diff --git a/client/allocrunner/taskrunner/script_check_hook_test.go b/client/allocrunner/taskrunner/script_check_hook_test.go index 9fba3a67f66..73ea2050e9b 100644 --- a/client/allocrunner/taskrunner/script_check_hook_test.go +++ b/client/allocrunner/taskrunner/script_check_hook_test.go @@ -286,3 +286,27 @@ func TestScript_TaskEnvInterpolation(t *testing.T) { require.True(t, ok) require.Equal(t, "my-job-backend-check", check.check.Name) } + +func TestScript_associated(t *testing.T) { + t.Run("neither set", func(t *testing.T) { + require.False(t, new(scriptCheckHook).associated("task1", "", "")) + }) + + t.Run("service set", func(t *testing.T) { + require.True(t, new(scriptCheckHook).associated("task1", "task1", "")) + require.False(t, new(scriptCheckHook).associated("task1", "task2", "")) + }) + + t.Run("check set", func(t *testing.T) { + require.True(t, new(scriptCheckHook).associated("task1", "", "task1")) + require.False(t, new(scriptCheckHook).associated("task1", "", "task2")) + }) + + t.Run("both set", func(t *testing.T) { + // ensure check.task takes precedence over service.task + require.True(t, new(scriptCheckHook).associated("task1", "task1", "task1")) + require.False(t, new(scriptCheckHook).associated("task1", "task1", "task2")) + require.True(t, new(scriptCheckHook).associated("task1", "task2", "task1")) + require.False(t, new(scriptCheckHook).associated("task1", "task2", "task2")) + }) +} diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go index 7ff57ccd06c..c80fa533f27 100644 --- a/nomad/job_endpoint_test.go +++ b/nomad/job_endpoint_test.go @@ -478,6 +478,7 @@ func TestJobEndpoint_Register_ConnectExposeCheck(t *testing.T) { Name: "check2", Type: "script", Command: "/bin/true", + TaskName: "web", Interval: 1 * time.Second, Timeout: 1 * time.Second, }, { diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 340f2e36c37..1fcd57f05ce 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -5801,6 +5801,12 @@ func (tg *TaskGroup) Validate(j *Job) error { mErr.Errors = append(mErr.Errors, outer) } + // Validate group service script-checks + if err := tg.validateScriptChecksInGroupServices(); err != nil { + outer := fmt.Errorf("Task group service check validation failed: %v", err) + mErr.Errors = append(mErr.Errors, outer) + } + // Validate the scaling policy if err := tg.validateScalingPolicy(j); err != nil { outer := fmt.Errorf("Task group scaling policy validation failed: %v", err) @@ -5950,6 +5956,26 @@ func (tg *TaskGroup) validateServices() error { return mErr.ErrorOrNil() } +// validateScriptChecksInGroupServices ensures group-level services with script +// checks know what task driver to use. Either the service.task or service.check.task +// parameter must be configured. +func (tg *TaskGroup) validateScriptChecksInGroupServices() error { + var mErr multierror.Error + for _, service := range tg.Services { + if service.TaskName == "" { + for _, check := range service.Checks { + if check.Type == "script" && check.TaskName == "" { + mErr.Errors = append(mErr.Errors, + fmt.Errorf("Service [%s]->%s or Check %s must specify task parameter", + tg.Name, service.Name, check.Name, + )) + } + } + } + } + return mErr.ErrorOrNil() +} + // validateScalingPolicy ensures that the scaling policy has consistent // min and max, not in conflict with the task group count func (tg *TaskGroup) validateScalingPolicy(j *Job) error { diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index 9ea2c8636c0..2a0cc00c7e1 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -5584,3 +5584,69 @@ func TestAllocatedSharedResources_Canonicalize(t *testing.T) { }, }, a.Ports) } + +func TestTaskGroup_validateScriptChecksInGroupServices(t *testing.T) { + t.Run("service task not set", func(t *testing.T) { + tg := &TaskGroup{ + Name: "group1", + Services: []*Service{{ + Name: "service1", + TaskName: "", // unset + Checks: []*ServiceCheck{{ + Name: "check1", + Type: "script", + TaskName: "", // unset + }, { + Name: "check2", + Type: "ttl", // not script + }, { + Name: "check3", + Type: "script", + TaskName: "", // unset + }}, + }, { + Name: "service2", + Checks: []*ServiceCheck{{ + Type: "script", + TaskName: "task1", // set + }}, + }, { + Name: "service3", + TaskName: "", // unset + Checks: []*ServiceCheck{{ + Name: "check1", + Type: "script", + TaskName: "", // unset + }}, + }}, + } + + errStr := tg.validateScriptChecksInGroupServices().Error() + require.Contains(t, errStr, "Service [group1]->service1 or Check check1 must specify task parameter") + require.Contains(t, errStr, "Service [group1]->service1 or Check check3 must specify task parameter") + require.Contains(t, errStr, "Service [group1]->service3 or Check check1 must specify task parameter") + }) + + t.Run("service task set", func(t *testing.T) { + tgOK := &TaskGroup{ + Name: "group1", + Services: []*Service{{ + Name: "service1", + TaskName: "task1", + Checks: []*ServiceCheck{{ + Name: "check1", + Type: "script", + }, { + Name: "check2", + Type: "ttl", + }, { + Name: "check3", + Type: "script", + }}, + }}, + } + + mErrOK := tgOK.validateScriptChecksInGroupServices() + require.Nil(t, mErrOK) + }) +} diff --git a/website/pages/docs/job-specification/service.mdx b/website/pages/docs/job-specification/service.mdx index 634f3ef6c96..716e11215d8 100644 --- a/website/pages/docs/job-specification/service.mdx +++ b/website/pages/docs/job-specification/service.mdx @@ -246,7 +246,8 @@ scripts. - `task` `(string: )` - Specifies the task associated with this check. Scripts are executed within the task's environment, and `check_restart` stanzas will apply to the specified task. For `checks` on group - level `services` only. + level `services` only. Inherits the [`service.task`][service_task] value if not + set. - `timeout` `(string: )` - Specifies how long Consul will wait for a health check query to succeed. This is specified using a label suffix like @@ -714,3 +715,4 @@ advertise and check directly since Nomad isn't managing any port assignments. [shutdowndelay]: /docs/job-specification/task#shutdown_delay [killsignal]: /docs/job-specification/task#kill_signal [killtimeout]: /docs/job-specification/task#kill_timeout +[service_task]: /docs/job-specification/service#task-1