diff --git a/client/client.go b/client/client.go index 25487a031b9..03e1c0045c5 100644 --- a/client/client.go +++ b/client/client.go @@ -1003,12 +1003,9 @@ func (c *Client) updateNodeFromFingerprint(response *cstructs.FingerprintRespons for name, newVal := range response.Drivers { oldVal := c.config.Node.Drivers[name] - if newVal.Equals(oldVal) { - continue - } - if oldVal == nil { + if oldVal == nil && newVal != nil { c.config.Node.Drivers[name] = newVal - } else { + } else if newVal != nil && newVal.Detected != oldVal.Detected { c.config.Node.Drivers[name].MergeFingerprintInfo(newVal) } } @@ -1027,8 +1024,11 @@ func (c *Client) updateNodeFromHealthCheck(response *cstructs.HealthCheckRespons // update the node with the latest driver health information for name, newVal := range response.Drivers { + if newVal == nil { + continue + } oldVal := c.config.Node.Drivers[name] - if newVal.Equals(oldVal) { + if newVal.HealthCheckEquals(oldVal) { continue } nodeHasChanged = true @@ -1597,6 +1597,7 @@ func (c *Client) watchNodeUpdates() { c.retryRegisterNode() hasChanged = false + timer.Reset(c.retryIntv(nodeUpdateRetryIntv)) case <-c.triggerNodeUpdate: if hasChanged { continue diff --git a/client/driver/docker.go b/client/driver/docker.go index bf24bfc486f..07d3afa9553 100644 --- a/client/driver/docker.go +++ b/client/driver/docker.go @@ -560,7 +560,14 @@ func (d *DockerDriver) HealthCheck(req *cstructs.HealthCheckRequest, resp *cstru UpdateTime: time.Now(), } - _, _, err := d.dockerClients() + client, _, err := d.dockerClients() + if err != nil { + d.logger.Printf("[WARN] driver.docker: docker driver is available but is unresponsive to `docker ps`") + resp.AddDriverInfo("docker", unhealthy) + return err + } + + _, err = client.ListContainers(docker.ListContainersOptions{All: false}) if err != nil { d.logger.Printf("[WARN] driver.docker: docker driver is available but is unresponsive to `docker ps`") resp.AddDriverInfo("docker", unhealthy) @@ -582,7 +589,7 @@ func (d *DockerDriver) HealthCheck(req *cstructs.HealthCheckRequest, resp *cstru // interval at which to do them. func (d *DockerDriver) GetHealthCheckInterval(req *cstructs.HealthCheckIntervalRequest, resp *cstructs.HealthCheckIntervalResponse) error { resp.Eligible = true - resp.Period = 1 * time.Minute + resp.Period = 3 * time.Second return nil } diff --git a/client/fingerprint_manager.go b/client/fingerprint_manager.go index 94791490f17..1d3229b5e79 100644 --- a/client/fingerprint_manager.go +++ b/client/fingerprint_manager.go @@ -201,9 +201,7 @@ func (fm *FingerprintManager) fingerprint(name string, f fingerprint.Fingerprint func (fm *FingerprintManager) healthCheck(name string, hc fingerprint.HealthCheck) error { request := &cstructs.HealthCheckRequest{} var response cstructs.HealthCheckResponse - if err := hc.HealthCheck(request, &response); err != nil { - return err - } + err := hc.HealthCheck(request, &response) if node := fm.updateHealthCheck(&response); node != nil { fm.nodeLock.Lock() @@ -211,7 +209,7 @@ func (fm *FingerprintManager) healthCheck(name string, hc fingerprint.HealthChec fm.nodeLock.Unlock() } - return nil + return err } // setupFingerprints is used to fingerprint the node to see if these attributes are diff --git a/nomad/structs/node.go b/nomad/structs/node.go new file mode 100644 index 00000000000..80daed172f2 --- /dev/null +++ b/nomad/structs/node.go @@ -0,0 +1,46 @@ +package structs + +import ( + "strings" + "time" +) + +// DriverInfo is the current state of a single driver. This is updated +// regularly as driver health changes on the node. +type DriverInfo struct { + Attributes map[string]string + Detected bool + Healthy bool + HealthDescription string + UpdateTime time.Time +} + +func (di *DriverInfo) MergeHealthCheck(other *DriverInfo) { + di.Healthy = other.Healthy + di.HealthDescription = other.HealthDescription + di.UpdateTime = other.UpdateTime +} + +func (di *DriverInfo) MergeFingerprintInfo(other *DriverInfo) { + di.Detected = other.Detected + di.Attributes = other.Attributes +} + +// DriverInfo determines if two driver info objects are equal..As this is used +// in the process of health checking, we only check the fields that are +// computed by the health checker. In the future, this will be merged. +func (di *DriverInfo) HealthCheckEquals(other *DriverInfo) bool { + if di == nil && other != nil || di != nil && other == nil { + return false + } + + if di.Healthy != other.Healthy { + return false + } + + if strings.Compare(di.HealthDescription, other.HealthDescription) != 0 { + return false + } + + return true +} diff --git a/nomad/structs/node_test.go b/nomad/structs/node_test.go new file mode 100644 index 00000000000..9a5ff7ae2a0 --- /dev/null +++ b/nomad/structs/node_test.go @@ -0,0 +1,60 @@ +package structs + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestDriverInfoEquals(t *testing.T) { + require := require.New(t) + var driverInfoTest = []struct { + input []*DriverInfo + expected bool + errorMsg string + }{ + { + []*DriverInfo{ + &DriverInfo{ + Healthy: true, + }, + &DriverInfo{ + Healthy: false, + }, + }, + false, + "Different healthy values should not be equal.", + }, + { + []*DriverInfo{ + &DriverInfo{ + HealthDescription: "not running", + }, + &DriverInfo{ + HealthDescription: "running", + }, + }, + false, + "Different health description values should not be equal.", + }, + { + []*DriverInfo{ + &DriverInfo{ + Healthy: true, + HealthDescription: "running", + }, + &DriverInfo{ + Healthy: true, + HealthDescription: "running", + }, + }, + true, + "Different health description values should not be equal.", + }, + } + for _, testCase := range driverInfoTest { + first := testCase.input[0] + second := testCase.input[1] + require.Equal(testCase.expected, first.HealthCheckEquals(second), testCase.errorMsg) + } +} diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index a24ea8cf699..fda297ff2af 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1079,50 +1079,6 @@ func ValidNodeStatus(status string) bool { } } -// DriverInfo is the current state of a single driver. This is updated -// regularly as driver health changes on the node. -type DriverInfo struct { - Attributes map[string]string - Detected bool - Healthy bool - HealthDescription string - UpdateTime time.Time -} - -func (di *DriverInfo) MergeHealthCheck(other *DriverInfo) { - di.Healthy = other.Healthy - di.HealthDescription = other.HealthDescription - di.UpdateTime = other.UpdateTime -} - -func (di *DriverInfo) MergeFingerprintInfo(other *DriverInfo) { - di.Detected = other.Detected - di.Attributes = other.Attributes -} - -func (di *DriverInfo) Equals(other *DriverInfo) bool { - if di == nil && other == nil { - return true - } - - if di == nil && other != nil || di != nil && other == nil { - return false - } - if !di.Detected == other.Detected { - return false - } - - if !di.Healthy == other.Healthy { - return false - } - - if strings.Compare(di.HealthDescription, other.HealthDescription) != 0 { - return false - } - - return true -} - // Node is a representation of a schedulable client node type Node struct { // ID is a unique identifier for the node. It can be constructed diff --git a/scheduler/feasible.go b/scheduler/feasible.go index 76d35138bee..359591c8048 100644 --- a/scheduler/feasible.go +++ b/scheduler/feasible.go @@ -135,13 +135,14 @@ func (c *DriverChecker) hasDrivers(option *structs.Node) bool { // be on a later version than a Nomad client, we need to check for // compatibility here to verify the client supports this. if option.Drivers != nil { - driverInfo := option.Drivers[driverStr] + driverInfo := option.Drivers[driver] if driverInfo == nil { c.ctx.Logger(). Printf("[WARN] scheduler.DriverChecker: node %v has no driver info set for %v", - option.ID, driverStr) + option.ID, driver) return false } + return driverInfo.Detected && driverInfo.Healthy } else { value, ok := option.Attributes[driverStr] diff --git a/scheduler/feasible_test.go b/scheduler/feasible_test.go index e5de59efc5b..54830726300 100644 --- a/scheduler/feasible_test.go +++ b/scheduler/feasible_test.go @@ -134,21 +134,21 @@ func TestDriverChecker_HealthChecks(t *testing.T) { mock.Node(), } nodes[0].Attributes["driver.foo"] = "1" - nodes[0].Drivers["driver.foo"] = &structs.DriverInfo{ + nodes[0].Drivers["foo"] = &structs.DriverInfo{ Detected: true, Healthy: true, HealthDescription: "running", UpdateTime: time.Now(), } nodes[1].Attributes["driver.bar"] = "1" - nodes[1].Drivers["driver.bar"] = &structs.DriverInfo{ + nodes[1].Drivers["bar"] = &structs.DriverInfo{ Detected: true, Healthy: false, HealthDescription: "not running", UpdateTime: time.Now(), } nodes[2].Attributes["driver.baz"] = "0" - nodes[2].Drivers["driver.baz"] = &structs.DriverInfo{ + nodes[2].Drivers["baz"] = &structs.DriverInfo{ Detected: false, Healthy: false, HealthDescription: "not running",