Skip to content

Commit

Permalink
Merge pull request #9033 from pierreca/verify-remove-checks
Browse files Browse the repository at this point in the history
Do not double-remove checks removed by Consul
  • Loading branch information
shoenig authored Oct 6, 2020
2 parents 3b4a018 + 6c5d2c3 commit d538559
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 8 deletions.
6 changes: 6 additions & 0 deletions command/agent/consul/catalog_testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,12 @@ func (c *MockAgent) ServiceDeregister(serviceID string) error {
defer c.mu.Unlock()
c.hits++
delete(c.services, serviceID)
for k, v := range c.checks {
if v.ServiceID == serviceID {
delete(c.checks, k)
delete(c.checkTTLs, k)
}
}
return nil
}

Expand Down
35 changes: 27 additions & 8 deletions command/agent/consul/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -591,12 +591,6 @@ func (c *ServiceClient) sync(reason syncReason) error {
return fmt.Errorf("error querying Consul services: %v", err)
}

consulChecks, err := c.client.Checks()
if err != nil {
metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
return fmt.Errorf("error querying Consul checks: %v", err)
}

inProbation := time.Now().Before(c.deregisterProbationExpiry)

// Remove Nomad services in Consul but unknown locally
Expand Down Expand Up @@ -656,6 +650,12 @@ func (c *ServiceClient) sync(reason syncReason) error {

}

consulChecks, err := c.client.Checks()
if err != nil {
metrics.IncrCounter([]string{"client", "consul", "sync_failure"}, 1)
return fmt.Errorf("error querying Consul checks: %v", err)
}

// Remove Nomad checks in Consul but unknown locally
for id, check := range consulChecks {
if _, ok := c.checks[id]; ok {
Expand Down Expand Up @@ -1227,9 +1227,28 @@ func (c *ServiceClient) Shutdown() error {
c.logger.Error("failed deregistering agent service", "service_id", id, "error", err)
}
}

remainingChecks, err := c.client.Checks()
if err != nil {
c.logger.Error("failed listing remaining checks after deregistering services", "error", err)
}

checkRemains := func(id string) bool {
for _, c := range remainingChecks {
if c.CheckID == id {
return true
}
}
return false
}

for id := range c.agentChecks {
if err := c.client.CheckDeregister(id); err != nil {
c.logger.Error("failed deregistering agent check", "check_id", id, "error", err)
// if we couldn't populate remainingChecks it is unlikely that CheckDeregister will work, but try anyway
// if we could list the remaining checks, verify that the check we store still exists before removing it.
if remainingChecks == nil || checkRemains(id) {
if err := c.client.CheckDeregister(id); err != nil {
c.logger.Error("failed deregistering agent check", "check_id", id, "error", err)
}
}
}

Expand Down

1 comment on commit d538559

@vercel
Copy link

@vercel vercel bot commented on d538559 Oct 6, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.