Skip to content

Commit

Permalink
Merge pull request #8977 from hashicorp/b-script-check-task
Browse files Browse the repository at this point in the history
consul: fix validation of task in group-level script-checks
  • Loading branch information
shoenig authored Sep 29, 2020
2 parents c9d5048 + 23604e0 commit 095d409
Show file tree
Hide file tree
Showing 7 changed files with 141 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ IMPROVEMENTS:
BUG FIXES:

* core: Fixed a bug where blocking queries would not include the query's maximum wait time when calculating whether it was safe to retry. [[GH-8921](https://github.com/hashicorp/nomad/issues/8921)]
* consul: Fixed a bug to correctly validate task when using script-checks in group-level services [[GH-8952](https://github.com/hashicorp/nomad/issues/8952)]

## 0.12.5 (September 17, 2020)

Expand Down
22 changes: 20 additions & 2 deletions client/allocrunner/taskrunner/script_check_hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,14 +204,18 @@ func (h *scriptCheckHook) newScriptChecks() map[string]*scriptCheck {
// for them. The group-level service and any check restart behaviors it
// needs are entirely encapsulated within the group service hook which
// watches Consul for status changes.
//
// The script check is associated with a group task if the service.task or
// service.check.task matches the task name. The service.check.task takes
// precedence.
tg := h.alloc.Job.LookupTaskGroup(h.alloc.TaskGroup)
interpolatedGroupServices := taskenv.InterpolateServices(h.taskEnv, tg.Services)
for _, service := range interpolatedGroupServices {
for _, check := range service.Checks {
if check.Type != structs.ServiceCheckScript {
continue
}
if check.TaskName != h.task.Name {
if !h.associated(h.task.Name, service.TaskName, check.TaskName) {
continue
}
groupTaskName := "group-" + tg.Name
Expand All @@ -237,6 +241,20 @@ func (h *scriptCheckHook) newScriptChecks() map[string]*scriptCheck {
return scriptChecks
}

// associated returns true if the script check is associated with the task. This
// would be the case if the check.task is the same as task, or if the service.task
// is the same as the task _and_ check.task is not configured (i.e. the check
// inherits the task of the service).
func (*scriptCheckHook) associated(task, serviceTask, checkTask string) bool {
if checkTask == task {
return true
}
if serviceTask == task && checkTask == "" {
return true
}
return false
}

// heartbeater is the subset of consul agent functionality needed by script
// checks to heartbeat
type heartbeater interface {
Expand Down Expand Up @@ -371,7 +389,7 @@ const (
updateTTLBackoffLimit = 3 * time.Second
)

// updateTTL updates the state to Consul, performing an expontential backoff
// updateTTL updates the state to Consul, performing an exponential backoff
// in the case where the check isn't registered in Consul to avoid a race between
// service registration and the first check.
func (s *scriptCheck) updateTTL(ctx context.Context, msg, state string) error {
Expand Down
24 changes: 24 additions & 0 deletions client/allocrunner/taskrunner/script_check_hook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -286,3 +286,27 @@ func TestScript_TaskEnvInterpolation(t *testing.T) {
require.True(t, ok)
require.Equal(t, "my-job-backend-check", check.check.Name)
}

func TestScript_associated(t *testing.T) {
t.Run("neither set", func(t *testing.T) {
require.False(t, new(scriptCheckHook).associated("task1", "", ""))
})

t.Run("service set", func(t *testing.T) {
require.True(t, new(scriptCheckHook).associated("task1", "task1", ""))
require.False(t, new(scriptCheckHook).associated("task1", "task2", ""))
})

t.Run("check set", func(t *testing.T) {
require.True(t, new(scriptCheckHook).associated("task1", "", "task1"))
require.False(t, new(scriptCheckHook).associated("task1", "", "task2"))
})

t.Run("both set", func(t *testing.T) {
// ensure check.task takes precedence over service.task
require.True(t, new(scriptCheckHook).associated("task1", "task1", "task1"))
require.False(t, new(scriptCheckHook).associated("task1", "task1", "task2"))
require.True(t, new(scriptCheckHook).associated("task1", "task2", "task1"))
require.False(t, new(scriptCheckHook).associated("task1", "task2", "task2"))
})
}
1 change: 1 addition & 0 deletions nomad/job_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,7 @@ func TestJobEndpoint_Register_ConnectExposeCheck(t *testing.T) {
Name: "check2",
Type: "script",
Command: "/bin/true",
TaskName: "web",
Interval: 1 * time.Second,
Timeout: 1 * time.Second,
}, {
Expand Down
26 changes: 26 additions & 0 deletions nomad/structs/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -5801,6 +5801,12 @@ func (tg *TaskGroup) Validate(j *Job) error {
mErr.Errors = append(mErr.Errors, outer)
}

// Validate group service script-checks
if err := tg.validateScriptChecksInGroupServices(); err != nil {
outer := fmt.Errorf("Task group service check validation failed: %v", err)
mErr.Errors = append(mErr.Errors, outer)
}

// Validate the scaling policy
if err := tg.validateScalingPolicy(j); err != nil {
outer := fmt.Errorf("Task group scaling policy validation failed: %v", err)
Expand Down Expand Up @@ -5950,6 +5956,26 @@ func (tg *TaskGroup) validateServices() error {
return mErr.ErrorOrNil()
}

// validateScriptChecksInGroupServices ensures group-level services with script
// checks know what task driver to use. Either the service.task or service.check.task
// parameter must be configured.
func (tg *TaskGroup) validateScriptChecksInGroupServices() error {
var mErr multierror.Error
for _, service := range tg.Services {
if service.TaskName == "" {
for _, check := range service.Checks {
if check.Type == "script" && check.TaskName == "" {
mErr.Errors = append(mErr.Errors,
fmt.Errorf("Service [%s]->%s or Check %s must specify task parameter",
tg.Name, service.Name, check.Name,
))
}
}
}
}
return mErr.ErrorOrNil()
}

// validateScalingPolicy ensures that the scaling policy has consistent
// min and max, not in conflict with the task group count
func (tg *TaskGroup) validateScalingPolicy(j *Job) error {
Expand Down
66 changes: 66 additions & 0 deletions nomad/structs/structs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5584,3 +5584,69 @@ func TestAllocatedSharedResources_Canonicalize(t *testing.T) {
},
}, a.Ports)
}

func TestTaskGroup_validateScriptChecksInGroupServices(t *testing.T) {
t.Run("service task not set", func(t *testing.T) {
tg := &TaskGroup{
Name: "group1",
Services: []*Service{{
Name: "service1",
TaskName: "", // unset
Checks: []*ServiceCheck{{
Name: "check1",
Type: "script",
TaskName: "", // unset
}, {
Name: "check2",
Type: "ttl", // not script
}, {
Name: "check3",
Type: "script",
TaskName: "", // unset
}},
}, {
Name: "service2",
Checks: []*ServiceCheck{{
Type: "script",
TaskName: "task1", // set
}},
}, {
Name: "service3",
TaskName: "", // unset
Checks: []*ServiceCheck{{
Name: "check1",
Type: "script",
TaskName: "", // unset
}},
}},
}

errStr := tg.validateScriptChecksInGroupServices().Error()
require.Contains(t, errStr, "Service [group1]->service1 or Check check1 must specify task parameter")
require.Contains(t, errStr, "Service [group1]->service1 or Check check3 must specify task parameter")
require.Contains(t, errStr, "Service [group1]->service3 or Check check1 must specify task parameter")
})

t.Run("service task set", func(t *testing.T) {
tgOK := &TaskGroup{
Name: "group1",
Services: []*Service{{
Name: "service1",
TaskName: "task1",
Checks: []*ServiceCheck{{
Name: "check1",
Type: "script",
}, {
Name: "check2",
Type: "ttl",
}, {
Name: "check3",
Type: "script",
}},
}},
}

mErrOK := tgOK.validateScriptChecksInGroupServices()
require.Nil(t, mErrOK)
})
}
4 changes: 3 additions & 1 deletion website/pages/docs/job-specification/service.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,8 @@ scripts.
- `task` `(string: <required>)` - Specifies the task associated with this
check. Scripts are executed within the task's environment, and
`check_restart` stanzas will apply to the specified task. For `checks` on group
level `services` only.
level `services` only. Inherits the [`service.task`][service_task] value if not
set.

- `timeout` `(string: <required>)` - Specifies how long Consul will wait for a
health check query to succeed. This is specified using a label suffix like
Expand Down Expand Up @@ -714,3 +715,4 @@ advertise and check directly since Nomad isn't managing any port assignments.
[shutdowndelay]: /docs/job-specification/task#shutdown_delay
[killsignal]: /docs/job-specification/task#kill_signal
[killtimeout]: /docs/job-specification/task#kill_timeout
[service_task]: /docs/job-specification/service#task-1

0 comments on commit 095d409

Please sign in to comment.