diff --git a/api/nodes.go b/api/nodes.go index 194affde458..c8fd045047b 100644 --- a/api/nodes.go +++ b/api/nodes.go @@ -3,6 +3,7 @@ package api import ( "sort" "strconv" + "time" ) // Nodes is used to query node-related API endpoints @@ -94,6 +95,15 @@ func (n *Nodes) GC(nodeID string, q *QueryOptions) error { return err } +// DriverInfo is used to deserialize a DriverInfo entry +type DriverInfo struct { + Attributes map[string]string + Detected bool + Healthy bool + HealthDescription string + UpdateTime time.Time +} + // Node is used to deserialize a node entry. type Node struct { ID string @@ -111,6 +121,7 @@ type Node struct { Status string StatusDescription string StatusUpdatedAt int64 + Drivers map[string]*DriverInfo CreateIndex uint64 ModifyIndex uint64 } diff --git a/client/client.go b/client/client.go index 785936db98e..1ae49675940 100644 --- a/client/client.go +++ b/client/client.go @@ -960,8 +960,12 @@ func (c *Client) updateNodeFromHealthCheck(response *cstructs.HealthCheckRespons defer c.configLock.Unlock() // update the node with the latest driver health information - for name, val := range response.Drivers { - c.config.Node.Drivers[name] = val + for name, new_val := range response.Drivers { + old_val := c.config.Node.Drivers[name] + if new_val.Equals(old_val) { + continue + } + c.config.Node.Drivers[name] = new_val } return c.config.Node diff --git a/client/driver/docker.go b/client/driver/docker.go index f1b0c4f0285..343a13fc028 100644 --- a/client/driver/docker.go +++ b/client/driver/docker.go @@ -551,20 +551,20 @@ func (d *DockerDriver) Fingerprint(req *cstructs.FingerprintRequest, resp *cstru return nil } -func (d *DockerDriver) Check(req *cstructs.HealthCheckRequest, resp *cstructs.HealthCheckResponse) error { +func (d *DockerDriver) HealthCheck(req *cstructs.HealthCheckRequest, resp *cstructs.HealthCheckResponse) error { unhealthy := &structs.DriverInfo{ HealthDescription: "Docker driver is available but unresponsive", UpdateTime: time.Now(), } - _, err := client.ListContainers(docker.ListContainersOptions{All: false}) + _, _, err := d.dockerClients() if err != nil { d.logger.Printf("[WARN] driver.docker: docker driver is available but is unresponsive to `docker ps`") resp.AddDriverInfo("driver.docker", unhealthy) return err } - d.logger.Printf("[DEBUG] driver.docker: docker driver is available and is responsive to `docker ps`") + d.logger.Printf("[TRACE] driver.docker: docker driver is available and is responsive to `docker ps`") healthy := &structs.DriverInfo{ Healthy: true, HealthDescription: "Docker driver is available and responsive", @@ -574,8 +574,10 @@ func (d *DockerDriver) Check(req *cstructs.HealthCheckRequest, resp *cstructs.He return nil } -func (d *DockerDriver) CheckHealthPeriodic() (bool, time.Duration) { - return true, 1 * time.Minute +func (d *DockerDriver) GetHealthCheckInterval(req *cstructs.HealthCheckIntervalRequest, resp *cstructs.HealthCheckIntervalResponse) error { + resp.Eligible = true + resp.Period = 1 * time.Minute + return nil } // Validate is used to validate the driver configuration diff --git a/client/driver/docker_test.go b/client/driver/docker_test.go index 80be88e4a40..12b4ad2a9e2 100644 --- a/client/driver/docker_test.go +++ b/client/driver/docker_test.go @@ -286,7 +286,7 @@ func TestDockerDriver_Check_DockerHealthStatus(t *testing.T) { dc, ok := dd.(fingerprint.HealthCheck) require.True(ok) - err = dc.Check(request, &response) + err = dc.HealthCheck(request, &response) require.Nil(err) driverInfo := response.Drivers["driver.docker"] diff --git a/client/driver/mock_driver.go b/client/driver/mock_driver.go index 03811ef3dd4..5c66b122332 100644 --- a/client/driver/mock_driver.go +++ b/client/driver/mock_driver.go @@ -235,9 +235,9 @@ func (m *MockDriver) Fingerprint(req *cstructs.FingerprintRequest, resp *cstruct return nil } -// Check implements the interface for HealthCheck, and indicates the current +// HealthCheck implements the interface for HealthCheck, and indicates the current // health status of the mock driver. -func (m *MockDriver) Check(req *cstructs.HealthCheckRequest, resp *cstructs.HealthCheckResponse) error { +func (m *MockDriver) HealthCheck(req *cstructs.HealthCheckRequest, resp *cstructs.HealthCheckResponse) error { if !m.shutdownFingerprintTime.IsZero() && time.Now().After(m.shutdownFingerprintTime) { notHealthy := &structs.DriverInfo{ Healthy: false, @@ -256,12 +256,14 @@ func (m *MockDriver) Check(req *cstructs.HealthCheckRequest, resp *cstructs.Heal return nil } -// CheckHealthPeriodic implements the interface for HealthCheck and indicates +// GetHealthCheckInterval implements the interface for HealthCheck and indicates // that mock driver should be checked periodically. Returns a boolean // indicating if ti should be checked, and the duration at which to do this // check. -func (m *MockDriver) CheckHealthPeriodic() (bool, time.Duration) { - return true, 1 * time.Second +func (m *MockDriver) GetHealthCheckInterval(req *cstructs.HealthCheckIntervalRequest, resp *cstructs.HealthCheckIntervalResponse) error { + resp.Eligible = true + resp.Period = 1 * time.Second + return nil } // MockDriverHandle is a driver handler which supervises a mock task diff --git a/client/fingerprint/fingerprint.go b/client/fingerprint/fingerprint.go index 322788462a6..d6746a03d9d 100644 --- a/client/fingerprint/fingerprint.go +++ b/client/fingerprint/fingerprint.go @@ -91,13 +91,13 @@ type Factory func(*log.Logger) Fingerprint type HealthCheck interface { // Check is used to update properties of the node on the status of the health // check - Check(*cstructs.HealthCheckRequest, *cstructs.HealthCheckResponse) error + HealthCheck(*cstructs.HealthCheckRequest, *cstructs.HealthCheckResponse) error - // CheckHealthPeriodic is a mechanism for the health checker to indicate that + // GetHealthCheckInterval is a mechanism for the health checker to indicate that // it should be run periodically. The return value is a boolean indicating // whether it should be done periodically, and the time interval at which // this check should happen. - CheckHealthPeriodic() (bool, time.Duration) + GetHealthCheckInterval(*cstructs.HealthCheckIntervalRequest, *cstructs.HealthCheckIntervalResponse) error } // Fingerprint is used for doing "fingerprinting" of the diff --git a/client/fingerprint_manager.go b/client/fingerprint_manager.go index fcec58050ae..cfd2ca8385e 100644 --- a/client/fingerprint_manager.go +++ b/client/fingerprint_manager.go @@ -20,9 +20,12 @@ type FingerprintManager struct { nodeLock sync.Mutex shutdownCh chan struct{} - // updateNode is a callback to the client to update the state of its + // updateNodeAttributes is a callback to the client to update the state of its // associated node - updateNode func(*cstructs.FingerprintResponse) *structs.Node + updateNodeAttributes func(*cstructs.FingerprintResponse) *structs.Node + + // UpdateHealthCheck is a callback to the client to update the state of the + // node for resources that require a health check updateHealthCheck func(*cstructs.HealthCheckResponse) *structs.Node logger *log.Logger } @@ -32,16 +35,16 @@ type FingerprintManager struct { func NewFingerprintManager(getConfig func() *config.Config, node *structs.Node, shutdownCh chan struct{}, - updateNode func(*cstructs.FingerprintResponse) *structs.Node, + updateNodeAttributes func(*cstructs.FingerprintResponse) *structs.Node, updateHealthCheck func(*cstructs.HealthCheckResponse) *structs.Node, logger *log.Logger) *FingerprintManager { return &FingerprintManager{ - getConfig: getConfig, - updateNode: updateNode, - updateHealthCheck: updateHealthCheck, - node: node, - shutdownCh: shutdownCh, - logger: logger, + getConfig: getConfig, + updateNodeAttributes: updateNodeAttributes, + updateHealthCheck: updateHealthCheck, + node: node, + shutdownCh: shutdownCh, + logger: logger, } } @@ -95,7 +98,7 @@ func (fm *FingerprintManager) setupDrivers(drivers []string) error { return err } - detected, err := fm.fingerprint(name, d) + detected, err := fm.fingerprintDriver(name, d) if err != nil { fm.logger.Printf("[DEBUG] client.fingerprint_manager: fingerprinting for %v failed: %+v", name, err) return err @@ -112,8 +115,11 @@ func (fm *FingerprintManager) setupDrivers(drivers []string) error { } if hc, ok := d.(fingerprint.HealthCheck); ok { - if checkPeriodic, interval := hc.CheckHealthPeriodic(); checkPeriodic { - go fm.runHealthCheck(hc, interval, name) + req := &cstructs.HealthCheckIntervalRequest{} + resp := &cstructs.HealthCheckIntervalResponse{} + hc.GetHealthCheckInterval(req, resp) + if resp.Eligible { + go fm.runHealthCheck(hc, resp.Period, name) } } } @@ -122,6 +128,54 @@ func (fm *FingerprintManager) setupDrivers(drivers []string) error { return nil } +// fingerprintDriver is a temporary solution to move towards DriverInfo and +// away from annotating a node's attributes to demonstrate support for a +// particular driver. Takes the FingerprintResponse and converts it to the +// proper DriverInfo update and then sets the prefix attributes as well +func (fm *FingerprintManager) fingerprintDriver(name string, f fingerprint.Fingerprint) (bool, error) { + request := &cstructs.FingerprintRequest{Config: fm.getConfig(), Node: fm.node} + var response cstructs.FingerprintResponse + if err := f.Fingerprint(request, &response); err != nil { + return false, err + } + + fm.nodeLock.Lock() + if node := fm.updateNodeAttributes(&response); node != nil { + fm.node = node + } + fm.nodeLock.Unlock() + + if hc, ok := f.(fingerprint.HealthCheck); ok { + fm.healthCheck(name, hc) + } else { + // This is a temporary measure, as eventually all drivers will need to + // support this. Doing this so that we can enable this iteratively and also + // in a backwards compatible way, where node attributes for drivers will + // eventually be phased out. + + di := &structs.DriverInfo{ + Attributes: response.Attributes, + Detected: response.Detected, + Healthy: response.Detected, + UpdateTime: time.Now(), + } + + resp := &cstructs.HealthCheckResponse{ + Drivers: map[string]*structs.DriverInfo{ + name: di, + }, + } + + fm.nodeLock.Lock() + if node := fm.updateHealthCheck(resp); node != nil { + fm.node = node + } + fm.nodeLock.Unlock() + } + + return response.Detected, nil +} + // fingerprint does an initial fingerprint of the client. If the fingerprinter // is meant to be run continuously, a process is launched to perform this // fingerprint on an ongoing basis in the background. @@ -133,7 +187,7 @@ func (fm *FingerprintManager) fingerprint(name string, f fingerprint.Fingerprint } fm.nodeLock.Lock() - if node := fm.updateNode(&response); node != nil { + if node := fm.updateNodeAttributes(&response); node != nil { fm.node = node } fm.nodeLock.Unlock() @@ -145,15 +199,15 @@ func (fm *FingerprintManager) fingerprint(name string, f fingerprint.Fingerprint func (fm *FingerprintManager) healthCheck(name string, hc fingerprint.HealthCheck) error { request := &cstructs.HealthCheckRequest{} var response cstructs.HealthCheckResponse - if err := hc.Check(request, &response); err != nil { + if err := hc.HealthCheck(request, &response); err != nil { return err } - fm.nodeLock.Lock() if node := fm.updateHealthCheck(&response); node != nil { + fm.nodeLock.Lock() fm.node = node + fm.nodeLock.Unlock() } - fm.nodeLock.Unlock() return nil } @@ -187,8 +241,12 @@ func (fm *FingerprintManager) setupFingerprinters(fingerprints []string) error { } if hc, ok := f.(fingerprint.HealthCheck); ok { - if checkPeriodic, interval := hc.CheckHealthPeriodic(); checkPeriodic { - go fm.runHealthCheck(hc, interval, name) + req := &cstructs.HealthCheckIntervalRequest{} + var resp cstructs.HealthCheckIntervalResponse + if err := hc.GetHealthCheckInterval(req, &resp); err != nil { + if resp.Eligible { + go fm.runHealthCheck(hc, resp.Period, name) + } } } } diff --git a/client/fingerprint_manager_test.go b/client/fingerprint_manager_test.go index 268fb21099e..58bf5aba02b 100644 --- a/client/fingerprint_manager_test.go +++ b/client/fingerprint_manager_test.go @@ -155,6 +155,96 @@ func TestFingerprintManager_Fingerprint_Periodic(t *testing.T) { }) } +// This is a temporary measure to check that a driver has both attributes on a +// node set as well as DriverInfo. +func TestFingerprintManager_HealthCheck_Driver(t *testing.T) { + t.Parallel() + require := require.New(t) + + node := &structs.Node{ + Attributes: make(map[string]string, 0), + Drivers: make(map[string]*structs.DriverInfo, 0), + } + updateNode := func(r *cstructs.FingerprintResponse) *structs.Node { + if r.Attributes != nil { + for k, v := range r.Attributes { + node.Attributes[k] = v + } + } + return node + } + updateHealthCheck := func(resp *cstructs.HealthCheckResponse) *structs.Node { + if resp.Drivers != nil { + for k, v := range resp.Drivers { + node.Drivers[k] = v + } + } + return node + } + conf := config.DefaultConfig() + conf.Options = map[string]string{ + "test.shutdown_periodic_after": "true", + "test.shutdown_periodic_duration": "2", + } + getConfig := func() *config.Config { + return conf + } + + shutdownCh := make(chan struct{}) + defer (func() { + close(shutdownCh) + })() + + fm := NewFingerprintManager( + getConfig, + node, + shutdownCh, + updateNode, + updateHealthCheck, + testLogger(), + ) + + err := fm.Run() + require.Nil(err) + + // Ensure the mock driver is registered and healthy on the client + testutil.WaitForResult(func() (bool, error) { + mockDriverAttribute := node.Attributes["driver.mock_driver"] + if mockDriverAttribute == "" { + return false, fmt.Errorf("mock driver info should be set on the client attributes") + } + mockDriverInfo := node.Drivers["driver.mock_driver"] + if mockDriverInfo == nil { + return false, fmt.Errorf("mock driver info should be set on the client") + } + if !mockDriverInfo.Healthy { + return false, fmt.Errorf("mock driver info should be healthy") + } + return true, nil + }, func(err error) { + + // Ensure that a default driver without health checks enabled is registered and healthy on the client + testutil.WaitForResult(func() (bool, error) { + rawExecAttribute := node.Attributes["driver.raw_exec"] + if rawExecAttribute == "" { + return false, fmt.Errorf("raw exec info should be set on the client attributes") + } + rawExecInfo := node.Drivers["driver.raw_exec"] + if rawExecInfo == nil { + return false, fmt.Errorf("raw exec info should be set on the client") + } + if !rawExecInfo.Healthy { + return false, fmt.Errorf("raw exec info should be healthy") + } + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) + t.Fatalf("err: %v", err) + }) + +} + func TestFingerprintManager_HealthCheck_Periodic(t *testing.T) { t.Parallel() require := require.New(t) diff --git a/client/structs/structs.go b/client/structs/structs.go index 758074fd8d1..c36ae8501f4 100644 --- a/client/structs/structs.go +++ b/client/structs/structs.go @@ -4,6 +4,7 @@ import ( "crypto/md5" "io" "strconv" + "time" "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/nomad/structs" @@ -250,14 +251,23 @@ func (f *FingerprintResponse) RemoveLink(name string) { f.Links[name] = "" } +// HealthCheckRequest is the request type for a type that fulfils the Health +// Check interface type HealthCheckRequest struct{} +// HealthCheckResponse is the response type for a type that fulfills the Health +// Check interface type HealthCheckResponse struct { - // Drivers is a map of driver names to current driver information Drivers map[string]*structs.DriverInfo } +type HealthCheckIntervalRequest struct{} +type HealthCheckIntervalResponse struct { + Eligible bool + Period time.Duration +} + // AddDriverInfo adds information about a driver to the fingerprint response. // If the Drivers field has not yet been initialized, it does so here. func (h *HealthCheckResponse) AddDriverInfo(name string, driverInfo *structs.DriverInfo) { diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index abc8c47e7e1..71a509564cd 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1065,12 +1065,36 @@ func ValidNodeStatus(status string) bool { // DriverInfo is the current state of a single driver. This is updated // regularly as driver health changes on the node. type DriverInfo struct { - Enabled bool + Attributes map[string]string + Detected bool Healthy bool HealthDescription string UpdateTime time.Time } +func (di *DriverInfo) Equals(other *DriverInfo) bool { + if di == nil && other == nil { + return true + } + + if di == nil && other != nil || di != nil && other == nil { + return false + } + if !di.Detected == other.Detected { + return false + } + + if !di.Healthy == other.Healthy { + return false + } + + if strings.Compare(di.HealthDescription, other.HealthDescription) != 0 { + return false + } + + return true +} + // Node is a representation of a schedulable client node type Node struct { // ID is a unique identifier for the node. It can be constructed @@ -1145,12 +1169,12 @@ type Node struct { // updated StatusUpdatedAt int64 + // Drivers is a map of driver names to current driver information + Drivers map[string]*DriverInfo + // Raft Indexes CreateIndex uint64 ModifyIndex uint64 - - // Drivers is a map of driver names to current driver information - Drivers map[string]*DriverInfo } // Ready returns if the node is ready for running allocations