Skip to content

Commit

Permalink
connect: deployments should wait for Connect sidecar checks (#19334)
Browse files Browse the repository at this point in the history
When a Connect service is registered with Consul, Nomad includes the nested
`Connect.SidecarService` field that includes health checks for the Envoy
proxy. Because these are not part of the job spec, the alloc health tracker
created by `health_hook` doesn't know to read the value of these checks.

In many circumstances this won't be noticed, but if the Envoy health check
happens to take longer than the `update.min_healthy_time` (perhaps because it's
been set low), it's possible for a deployment to progress too early such that
there will briefly be no healthy instances of the service available in Consul.

Update the Consul service client to find the nested sidecar service in the
service catalog and attach it to the results provided to the tracker. The
tracker can then check the sidecar health checks.

Fixes: #19269
  • Loading branch information
tgross authored Dec 6, 2023
1 parent 340c9eb commit 3c4e200
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .changelog/19334.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
connect: Fixed a bug where deployments would not wait for Connect sidecar task health checks to pass
```
17 changes: 17 additions & 0 deletions client/allochealth/tracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,8 @@ func evaluateConsulChecks(services []*structs.Service, registrations *servicereg
}
}

// The sidecar service and checks are created when the service is
// registered, not on job registration, so they won't appear in the jobspec.
if !maps.Equal(expChecks, regChecks) {
return false
}
Expand All @@ -595,6 +597,21 @@ func evaluateConsulChecks(services []*structs.Service, registrations *servicereg
}
}
}

// Check the health of Connect sidecars, so we don't accidentally
// mark an allocation healthy before min_healthy_time. These don't
// currently support on_update.
if service.SidecarService != nil {
for _, check := range service.SidecarChecks {
switch check.Status {
case api.HealthWarning:
return false
case api.HealthCritical:
return false
}
}
}

}
}

Expand Down
36 changes: 36 additions & 0 deletions client/allochealth/tracker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1466,6 +1466,42 @@ func TestTracker_evaluateConsulChecks(t *testing.T) {
},
},
},
{
name: "failing sidecar checks only",
exp: false,
tg: &structs.TaskGroup{
Services: []*structs.Service{{
Name: "group-s1",
Checks: []*structs.ServiceCheck{
{Name: "c1"},
},
}},
},
registrations: &serviceregistration.AllocRegistration{
Tasks: map[string]*serviceregistration.ServiceRegistrations{
"group": {
Services: map[string]*serviceregistration.ServiceRegistration{
"abc123": {
ServiceID: "abc123",
Checks: []*consulapi.AgentCheck{
{
Name: "c1",
Status: consulapi.HealthPassing,
},
},
SidecarService: &consulapi.AgentService{},
SidecarChecks: []*consulapi.AgentCheck{
{
Name: "sidecar-check",
Status: consulapi.HealthCritical,
},
},
},
},
},
},
},
},
}

for _, tc := range cases {
Expand Down
6 changes: 6 additions & 0 deletions client/serviceregistration/service_registration.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,12 @@ type ServiceRegistration struct {

// Checks is the status of the registered checks.
Checks []*api.AgentCheck

// SidecarService is the AgentService registered in Consul for any Connect sidecar
SidecarService *api.AgentService

// SidecarChecks is the status of the registered checks for any Connect sidecar
SidecarChecks []*api.AgentCheck
}

func (s *ServiceRegistration) copy() *ServiceRegistration {
Expand Down
9 changes: 9 additions & 0 deletions command/agent/consul/service_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -1760,6 +1760,15 @@ func (c *ServiceClient) AllocRegistrations(allocID string) (*serviceregistration
sreg.Checks = append(sreg.Checks, check)
}
}

if sidecarService := getNomadSidecar(serviceID, services); sidecarService != nil {
sreg.SidecarService = sidecarService
for _, check := range checks {
if check.ServiceID == sidecarService.ID {
sreg.SidecarChecks = append(sreg.SidecarChecks, check)
}
}
}
}
}

Expand Down

0 comments on commit 3c4e200

Please sign in to comment.