diff --git a/api/nodes.go b/api/nodes.go index db7f25ffb39..549eeea6663 100644 --- a/api/nodes.go +++ b/api/nodes.go @@ -4,6 +4,7 @@ import ( "fmt" "sort" "strconv" + "time" ) // Nodes is used to query node-related API endpoints @@ -96,6 +97,15 @@ func (n *Nodes) GcAlloc(allocID string, q *QueryOptions) error { return err } +// DriverInfo is used to deserialize a DriverInfo entry +type DriverInfo struct { + Attributes map[string]string + Detected bool + Healthy bool + HealthDescription string + UpdateTime time.Time +} + // Node is used to deserialize a node entry. type Node struct { ID string @@ -114,6 +124,7 @@ type Node struct { StatusDescription string StatusUpdatedAt int64 Events []*NodeEvent + Drivers map[string]*DriverInfo CreateIndex uint64 ModifyIndex uint64 } diff --git a/client/client.go b/client/client.go index 5c221154667..f1022f44428 100644 --- a/client/client.go +++ b/client/client.go @@ -259,7 +259,8 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic } fingerprintManager := NewFingerprintManager(c.GetConfig, c.config.Node, - c.shutdownCh, c.updateNodeFromFingerprint, c.logger) + c.shutdownCh, c.updateNodeFromFingerprint, c.updateNodeFromDriver, + c.logger) // Fingerprint the node and scan for drivers if err := fingerprintManager.Run(); err != nil { @@ -894,6 +895,9 @@ func (c *Client) setupNode() error { if node.Links == nil { node.Links = make(map[string]string) } + if node.Drivers == nil { + node.Drivers = make(map[string]*structs.DriverInfo) + } if node.Meta == nil { node.Meta = make(map[string]string) } @@ -1006,6 +1010,81 @@ func (c *Client) updateNodeFromFingerprint(response *cstructs.FingerprintRespons if nodeHasChanged { c.updateNode() } + + return c.config.Node +} + +// updateNodeFromDriver receives either a fingerprint of the driver or its +// health and merges this into a single DriverInfo object +func (c *Client) updateNodeFromDriver(name string, fingerprint, health *structs.DriverInfo) *structs.Node { + c.configLock.Lock() + defer c.configLock.Unlock() + + var hasChanged bool + + if fingerprint != nil { + if c.config.Node.Drivers[name] == nil { + // If the driver info has not yet been set, do that here + hasChanged = true + c.config.Node.Drivers[name] = fingerprint + } else { + // The driver info has already been set, fix it up + if c.config.Node.Drivers[name].Detected != fingerprint.Detected { + hasChanged = true + c.config.Node.Drivers[name].Detected = fingerprint.Detected + } + + for attrName, newVal := range fingerprint.Attributes { + oldVal := c.config.Node.Drivers[name].Attributes[attrName] + if oldVal == newVal { + continue + } + + hasChanged = true + if newVal == "" { + delete(c.config.Node.Attributes, attrName) + } else { + c.config.Node.Attributes[attrName] = newVal + } + } + } + } + + if health != nil { + if c.config.Node.Drivers[name] == nil { + hasChanged = true + c.config.Node.Drivers[name] = health + } else { + oldVal := c.config.Node.Drivers[name] + if health.HealthCheckEquals(oldVal) { + // Make sure we accurately reflect the last time a health check has been + // performed for the driver. + oldVal.UpdateTime = health.UpdateTime + } else { + hasChanged = true + + // Only emit an event if the health status has changed, not if we are + // simply updating a node on startup + if health.Healthy != oldVal.Healthy && oldVal.HealthDescription != "" { + event := &structs.NodeEvent{ + Subsystem: "Driver", + Message: health.HealthDescription, + Timestamp: time.Now().Unix(), + } + c.triggerNodeEvent(event) + } + + // Update the node with the latest information + c.config.Node.Drivers[name].MergeHealthCheck(health) + } + } + } + + if hasChanged { + c.config.Node.Drivers[name].UpdateTime = time.Now() + c.updateNode() + } + return c.config.Node } @@ -1190,7 +1269,7 @@ func (c *Client) watchNodeEvents() { timer.Reset(c.retryIntv(nodeUpdateRetryIntv)) } else { // Reset the events since we successfully sent them. - batchEvents = nil + batchEvents = []*structs.NodeEvent{} } case <-c.shutdownCh: return diff --git a/client/client_test.go b/client/client_test.go index de5d35ee997..5e7967441b5 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -142,27 +142,63 @@ func TestClient_Fingerprint_Periodic(t *testing.T) { node := c1.config.Node mockDriverName := "driver.mock_driver" - // Ensure the mock driver is registered on the client - testutil.WaitForResult(func() (bool, error) { - mockDriverStatus := node.Attributes[mockDriverName] - if mockDriverStatus == "" { - return false, fmt.Errorf("mock driver attribute should be set on the client") - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) + { + // Ensure the mock driver is registered on the client + testutil.WaitForResult(func() (bool, error) { + c1.configLock.Lock() + defer c1.configLock.Unlock() + mockDriverStatus := node.Attributes[mockDriverName] + mockDriverInfo := node.Drivers["mock_driver"] + if mockDriverStatus == "" { + return false, fmt.Errorf("mock driver attribute should be set on the client") + } - // Ensure that the client fingerprinter eventually removes this attribute - testutil.WaitForResult(func() (bool, error) { - mockDriverStatus := node.Attributes[mockDriverName] - if mockDriverStatus != "" { - return false, fmt.Errorf("mock driver attribute should not be set on the client") - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) + // assert that the Driver information for the node is also set correctly + if mockDriverInfo == nil { + return false, fmt.Errorf("mock driver is nil when it should be set on node Drivers") + } + if !mockDriverInfo.Detected { + return false, fmt.Errorf("mock driver should be set as detected") + } + if !mockDriverInfo.Healthy { + return false, fmt.Errorf("mock driver should be set as healthy") + } + if mockDriverInfo.HealthDescription == "" { + return false, fmt.Errorf("mock driver description should not be empty") + } + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) + } + + { + testutil.WaitForResult(func() (bool, error) { + c1.configLock.Lock() + defer c1.configLock.Unlock() + mockDriverStatus := node.Attributes[mockDriverName] + mockDriverInfo := node.Drivers["mock_driver"] + if mockDriverStatus != "" { + return false, fmt.Errorf("mock driver attribute should not be set on the client") + } + // assert that the Driver information for the node is also set correctly + if mockDriverInfo == nil { + return false, fmt.Errorf("mock driver is nil when it should be set on node Drivers") + } + if mockDriverInfo.Detected { + return false, fmt.Errorf("mock driver should be set as detected") + } + if mockDriverInfo.Healthy { + return false, fmt.Errorf("mock driver should be set as healthy") + } + if mockDriverInfo.HealthDescription == "" { + return false, fmt.Errorf("mock driver description should not be empty") + } + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) + } } // TestClient_MixedTLS asserts that when a server is running with TLS enabled diff --git a/client/driver/docker.go b/client/driver/docker.go index 4f687133329..e5ffb1afa80 100644 --- a/client/driver/docker.go +++ b/client/driver/docker.go @@ -492,7 +492,6 @@ func (d *DockerDriver) Fingerprint(req *cstructs.FingerprintRequest, resp *cstru d.logger.Printf("[INFO] driver.docker: failed to initialize client: %s", err) } d.fingerprintSuccess = helper.BoolToPtr(false) - resp.RemoveAttribute(dockerDriverAttr) return nil } @@ -552,6 +551,46 @@ func (d *DockerDriver) Fingerprint(req *cstructs.FingerprintRequest, resp *cstru return nil } +// HealthCheck implements the interface for the HealthCheck interface. This +// performs a health check on the docker driver, asserting whether the docker +// driver is responsive to a `docker ps` command. +func (d *DockerDriver) HealthCheck(req *cstructs.HealthCheckRequest, resp *cstructs.HealthCheckResponse) error { + dinfo := &structs.DriverInfo{ + UpdateTime: time.Now(), + } + + client, _, err := d.dockerClients() + if err != nil { + d.logger.Printf("[WARN] driver.docker: failed to retrieve Docker client in the process of a docker health check: %v", err) + dinfo.HealthDescription = fmt.Sprintf("Failed retrieving Docker client: %v", err) + resp.AddDriverInfo("docker", dinfo) + return nil + } + + _, err = client.ListContainers(docker.ListContainersOptions{All: false}) + if err != nil { + d.logger.Printf("[WARN] driver.docker: failed to list Docker containers in the process of a Docker health check: %v", err) + dinfo.HealthDescription = fmt.Sprintf("Failed to list Docker containers: %v", err) + resp.AddDriverInfo("docker", dinfo) + return nil + } + + d.logger.Printf("[TRACE] driver.docker: docker driver is available and is responsive to `docker ps`") + dinfo.Healthy = true + dinfo.HealthDescription = "Docker driver is available and responsive" + resp.AddDriverInfo("docker", dinfo) + return nil +} + +// GetHealthChecks implements the interface for the HealthCheck interface. This +// sets whether the driver is eligible for periodic health checks and the +// interval at which to do them. +func (d *DockerDriver) GetHealthCheckInterval(req *cstructs.HealthCheckIntervalRequest, resp *cstructs.HealthCheckIntervalResponse) error { + resp.Eligible = true + resp.Period = 1 * time.Minute + return nil +} + // Validate is used to validate the driver configuration func (d *DockerDriver) Validate(config map[string]interface{}) error { fd := &fields.FieldData{ diff --git a/client/driver/docker_test.go b/client/driver/docker_test.go index 58e9bb813a0..316d9e1edf2 100644 --- a/client/driver/docker_test.go +++ b/client/driver/docker_test.go @@ -21,6 +21,7 @@ import ( "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/client/driver/env" + "github.com/hashicorp/nomad/client/fingerprint" cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/client/testutil" "github.com/hashicorp/nomad/helper/uuid" @@ -164,6 +165,7 @@ func TestDockerDriver_Fingerprint(t *testing.T) { if !tu.IsTravis() { t.Parallel() } + ctx := testDockerDriverContexts(t, &structs.Task{Name: "foo", Driver: "docker", Resources: basicResources}) //ctx.DriverCtx.config.Options = map[string]string{"docker.cleanup.image": "false"} defer ctx.AllocDir.Destroy() @@ -227,6 +229,7 @@ func TestDockerDriver_Fingerprint_Bridge(t *testing.T) { request := &cstructs.FingerprintRequest{Config: conf, Node: conf.Node} var response cstructs.FingerprintResponse + err = dd.Fingerprint(request, &response) if err != nil { t.Fatalf("error fingerprinting docker: %v", err) @@ -251,6 +254,44 @@ func TestDockerDriver_Fingerprint_Bridge(t *testing.T) { t.Logf("docker bridge ip: %q", attributes["driver.docker.bridge_ip"]) } +func TestDockerDriver_Check_DockerHealthStatus(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + if !testutil.DockerIsConnected(t) { + t.Skip("requires Docker") + } + if runtime.GOOS != "linux" { + t.Skip("expect only on linux") + } + + require := require.New(t) + + expectedAddr, err := sockaddr.GetInterfaceIP("docker0") + if err != nil { + t.Fatalf("unable to get ip for docker0: %v", err) + } + if expectedAddr == "" { + t.Fatalf("unable to get ip for docker bridge") + } + + conf := testConfig(t) + conf.Node = mock.Node() + dd := NewDockerDriver(NewDriverContext("", "", conf, conf.Node, testLogger(), nil)) + + request := &cstructs.HealthCheckRequest{} + var response cstructs.HealthCheckResponse + + dc, ok := dd.(fingerprint.HealthCheck) + require.True(ok) + err = dc.HealthCheck(request, &response) + require.Nil(err) + + driverInfo := response.Drivers["docker"] + require.NotNil(driverInfo) + require.True(driverInfo.Healthy) +} + func TestDockerDriver_StartOpen_Wait(t *testing.T) { if !tu.IsTravis() { t.Parallel() diff --git a/client/driver/mock_driver.go b/client/driver/mock_driver.go index e8a90f505d5..09a86f72ded 100644 --- a/client/driver/mock_driver.go +++ b/client/driver/mock_driver.go @@ -30,6 +30,8 @@ const ( // to "stop" a previously functioning driver after the specified duration // (specified in seconds) for testing of periodic drivers and fingerprinters. ShutdownPeriodicDuration = "test.shutdown_periodic_duration" + + mockDriverName = "driver.mock_driver" ) // MockDriverConfig is the driver configuration for the MockDriver @@ -234,9 +236,9 @@ func (m *MockDriver) Fingerprint(req *cstructs.FingerprintRequest, resp *cstruct // current time is after the time which the node should shut down, simulate // driver failure case !m.shutdownFingerprintTime.IsZero() && time.Now().After(m.shutdownFingerprintTime): - resp.RemoveAttribute("driver.mock_driver") + resp.RemoveAttribute(mockDriverName) default: - resp.AddAttribute("driver.mock_driver", "1") + resp.AddAttribute(mockDriverName, "1") resp.Detected = true } return nil @@ -247,6 +249,39 @@ func (m *MockDriver) Periodic() (bool, time.Duration) { return true, 500 * time.Millisecond } +// HealthCheck implements the interface for HealthCheck, and indicates the current +// health status of the mock driver. +func (m *MockDriver) HealthCheck(req *cstructs.HealthCheckRequest, resp *cstructs.HealthCheckResponse) error { + switch { + case !m.shutdownFingerprintTime.IsZero() && time.Now().After(m.shutdownFingerprintTime): + notHealthy := &structs.DriverInfo{ + Healthy: false, + HealthDescription: "not running", + UpdateTime: time.Now(), + } + resp.AddDriverInfo("mock_driver", notHealthy) + return nil + default: + healthy := &structs.DriverInfo{ + Healthy: true, + HealthDescription: "running", + UpdateTime: time.Now(), + } + resp.AddDriverInfo("mock_driver", healthy) + return nil + } +} + +// GetHealthCheckInterval implements the interface for HealthCheck and indicates +// that mock driver should be checked periodically. Returns a boolean +// indicating if it should be checked, and the duration at which to do this +// check. +func (m *MockDriver) GetHealthCheckInterval(req *cstructs.HealthCheckIntervalRequest, resp *cstructs.HealthCheckIntervalResponse) error { + resp.Eligible = true + resp.Period = 1 * time.Second + return nil +} + // MockDriverHandle is a driver handler which supervises a mock task type mockDriverHandle struct { ctx *ExecContext diff --git a/client/fingerprint/fingerprint.go b/client/fingerprint/fingerprint.go index 8a3477f5174..d6746a03d9d 100644 --- a/client/fingerprint/fingerprint.go +++ b/client/fingerprint/fingerprint.go @@ -85,6 +85,21 @@ func NewFingerprint(name string, logger *log.Logger) (Fingerprint, error) { // Factory is used to instantiate a new Fingerprint type Factory func(*log.Logger) Fingerprint +// HealthCheck is used for doing periodic health checks. On a given time +// interfal, a health check will be called by the fingerprint manager of the +// node. +type HealthCheck interface { + // Check is used to update properties of the node on the status of the health + // check + HealthCheck(*cstructs.HealthCheckRequest, *cstructs.HealthCheckResponse) error + + // GetHealthCheckInterval is a mechanism for the health checker to indicate that + // it should be run periodically. The return value is a boolean indicating + // whether it should be done periodically, and the time interval at which + // this check should happen. + GetHealthCheckInterval(*cstructs.HealthCheckIntervalRequest, *cstructs.HealthCheckIntervalResponse) error +} + // Fingerprint is used for doing "fingerprinting" of the // host to automatically determine attributes, resources, // and metadata about it. Each of these is a heuristic, and diff --git a/client/fingerprint_manager.go b/client/fingerprint_manager.go index 74b75bafde4..f1452aeb554 100644 --- a/client/fingerprint_manager.go +++ b/client/fingerprint_manager.go @@ -1,7 +1,9 @@ package client import ( + "fmt" "log" + "strings" "sync" "time" @@ -20,10 +22,14 @@ type FingerprintManager struct { nodeLock sync.Mutex shutdownCh chan struct{} - // updateNode is a callback to the client to update the state of its + // updateNodeAttributes is a callback to the client to update the state of its // associated node - updateNode func(*cstructs.FingerprintResponse) *structs.Node - logger *log.Logger + updateNodeAttributes func(*cstructs.FingerprintResponse) *structs.Node + + // updateNodeFromDriver is a callback to the client to update the state of a + // specific driver for the node + updateNodeFromDriver func(string, *structs.DriverInfo, *structs.DriverInfo) *structs.Node + logger *log.Logger } // NewFingerprintManager is a constructor that creates and returns an instance @@ -31,34 +37,123 @@ type FingerprintManager struct { func NewFingerprintManager(getConfig func() *config.Config, node *structs.Node, shutdownCh chan struct{}, - updateNode func(*cstructs.FingerprintResponse) *structs.Node, + updateNodeAttributes func(*cstructs.FingerprintResponse) *structs.Node, + updateNodeFromDriver func(string, *structs.DriverInfo, *structs.DriverInfo) *structs.Node, logger *log.Logger) *FingerprintManager { return &FingerprintManager{ - getConfig: getConfig, - updateNode: updateNode, - node: node, - shutdownCh: shutdownCh, - logger: logger, + getConfig: getConfig, + updateNodeAttributes: updateNodeAttributes, + updateNodeFromDriver: updateNodeFromDriver, + node: node, + shutdownCh: shutdownCh, + logger: logger, } } -// run runs each fingerprinter individually on an ongoing basis -func (fm *FingerprintManager) run(f fingerprint.Fingerprint, period time.Duration, name string) { - fm.logger.Printf("[DEBUG] client.fingerprint_manager: fingerprinting %s every %v", name, period) +// Run starts the process of fingerprinting the node. It does an initial pass, +// identifying whitelisted and blacklisted fingerprints/drivers. Then, for +// those which require periotic checking, it starts a periodic process for +// each. +func (fp *FingerprintManager) Run() error { + // First, set up all fingerprints + cfg := fp.getConfig() + whitelistFingerprints := cfg.ReadStringListToMap("fingerprint.whitelist") + whitelistFingerprintsEnabled := len(whitelistFingerprints) > 0 + blacklistFingerprints := cfg.ReadStringListToMap("fingerprint.blacklist") - for { - select { - case <-time.After(period): - _, err := fm.fingerprint(name, f) - if err != nil { - fm.logger.Printf("[DEBUG] client.fingerprint_manager: periodic fingerprinting for %v failed: %+v", name, err) - continue - } + fp.logger.Printf("[DEBUG] client.fingerprint_manager: built-in fingerprints: %v", fingerprint.BuiltinFingerprints()) - case <-fm.shutdownCh: - return + var availableFingerprints []string + var skippedFingerprints []string + for _, name := range fingerprint.BuiltinFingerprints() { + // Skip modules that are not in the whitelist if it is enabled. + if _, ok := whitelistFingerprints[name]; whitelistFingerprintsEnabled && !ok { + skippedFingerprints = append(skippedFingerprints, name) + continue } + // Skip modules that are in the blacklist + if _, ok := blacklistFingerprints[name]; ok { + skippedFingerprints = append(skippedFingerprints, name) + continue + } + + availableFingerprints = append(availableFingerprints, name) + } + + if err := fp.setupFingerprinters(availableFingerprints); err != nil { + return err } + + if len(skippedFingerprints) != 0 { + fp.logger.Printf("[DEBUG] client.fingerprint_manager: fingerprint modules skipped due to white/blacklist: %v", skippedFingerprints) + } + + // Next, set up drivers + // Build the white/blacklists of drivers. + whitelistDrivers := cfg.ReadStringListToMap("driver.whitelist") + whitelistDriversEnabled := len(whitelistDrivers) > 0 + blacklistDrivers := cfg.ReadStringListToMap("driver.blacklist") + + var availDrivers []string + var skippedDrivers []string + + for name := range driver.BuiltinDrivers { + // Skip fingerprinting drivers that are not in the whitelist if it is + // enabled. + if _, ok := whitelistDrivers[name]; whitelistDriversEnabled && !ok { + skippedDrivers = append(skippedDrivers, name) + continue + } + // Skip fingerprinting drivers that are in the blacklist + if _, ok := blacklistDrivers[name]; ok { + skippedDrivers = append(skippedDrivers, name) + continue + } + + availDrivers = append(availDrivers, name) + } + + if err := fp.setupDrivers(availDrivers); err != nil { + return err + } + + if len(skippedDrivers) > 0 { + fp.logger.Printf("[DEBUG] client.fingerprint_manager: drivers skipped due to white/blacklist: %v", skippedDrivers) + } + return nil +} + +// setupFingerprints is used to fingerprint the node to see if these attributes are +// supported +func (fm *FingerprintManager) setupFingerprinters(fingerprints []string) error { + var appliedFingerprints []string + + for _, name := range fingerprints { + f, err := fingerprint.NewFingerprint(name, fm.logger) + + if err != nil { + fm.logger.Printf("[ERR] client.fingerprint_manager: fingerprinting for %v failed: %+v", name, err) + return err + } + + detected, err := fm.fingerprint(name, f) + if err != nil { + return err + } + + // log the fingerprinters which have been applied + if detected { + appliedFingerprints = append(appliedFingerprints, name) + } + + p, period := f.Periodic() + if p { + go fm.runFingerprint(f, period, name) + } + } + + fm.logger.Printf("[DEBUG] client.fingerprint_manager: detected fingerprints %v", appliedFingerprints) + return nil } // setupDrivers is used to fingerprint the node to see if these drivers are @@ -73,20 +168,32 @@ func (fm *FingerprintManager) setupDrivers(drivers []string) error { return err } - detected, err := fm.fingerprint(name, d) + detected, err := fm.fingerprintDriver(name, d) if err != nil { - fm.logger.Printf("[DEBUG] client.fingerprint_manager: fingerprinting for %v failed: %+v", name, err) + fm.logger.Printf("[DEBUG] client.fingerprint_manager: fingerprinting driver %v failed: %+v", name, err) return err } - // log the fingerprinters which have been applied - if detected { - availDrivers = append(availDrivers, name) + // Set the initial health check status to be the driver detected status. + // Later, the periodic health checker will update this value for drivers + // where health checks are enabled. + healthInfo := &structs.DriverInfo{ + Healthy: detected, + UpdateTime: time.Now(), + } + if node := fm.updateNodeFromDriver(name, nil, healthInfo); node != nil { + fm.nodeLock.Lock() + fm.node = node + fm.nodeLock.Unlock() } - p, period := d.Periodic() - if p { - go fm.run(d, period, name) + // Start a periodic watcher to detect changes to a drivers health and + // attributes. + go fm.watchDriver(d, name) + + // Log the fingerprinters which have been applied + if detected { + availDrivers = append(availDrivers, name) } } @@ -94,14 +201,38 @@ func (fm *FingerprintManager) setupDrivers(drivers []string) error { return nil } +// runFingerprint runs each fingerprinter individually on an ongoing basis +func (fm *FingerprintManager) runFingerprint(f fingerprint.Fingerprint, period time.Duration, name string) { + fm.logger.Printf("[DEBUG] client.fingerprint_manager: fingerprinting %s every %v", name, period) + + timer := time.NewTimer(period) + defer timer.Stop() + + for { + select { + case <-timer.C: + timer.Reset(period) + + _, err := fm.fingerprint(name, f) + if err != nil { + fm.logger.Printf("[DEBUG] client.fingerprint_manager: periodic fingerprinting for %v failed: %+v", name, err) + continue + } + + case <-fm.shutdownCh: + return + } + } +} + // fingerprint does an initial fingerprint of the client. If the fingerprinter // is meant to be run continuously, a process is launched to perform this // fingerprint on an ongoing basis in the background. func (fm *FingerprintManager) fingerprint(name string, f fingerprint.Fingerprint) (bool, error) { - request := &cstructs.FingerprintRequest{Config: fm.getConfig(), Node: fm.node} var response cstructs.FingerprintResponse fm.nodeLock.Lock() + request := &cstructs.FingerprintRequest{Config: fm.getConfig(), Node: fm.node} err := f.Fingerprint(request, &response) fm.nodeLock.Unlock() @@ -109,7 +240,7 @@ func (fm *FingerprintManager) fingerprint(name string, f fingerprint.Fingerprint return false, err } - if node := fm.updateNode(&response); node != nil { + if node := fm.updateNodeAttributes(&response); node != nil { fm.nodeLock.Lock() fm.node = node fm.nodeLock.Unlock() @@ -118,108 +249,145 @@ func (fm *FingerprintManager) fingerprint(name string, f fingerprint.Fingerprint return response.Detected, nil } -// setupFingerprints is used to fingerprint the node to see if these attributes are -// supported -func (fm *FingerprintManager) setupFingerprinters(fingerprints []string) error { - var appliedFingerprints []string - - for _, name := range fingerprints { - f, err := fingerprint.NewFingerprint(name, fm.logger) +// watchDrivers facilitates the different periods between fingerprint and +// health checking a driver +func (fm *FingerprintManager) watchDriver(d driver.Driver, name string) { + var fingerprintTicker, healthTicker <-chan time.Time - if err != nil { - fm.logger.Printf("[DEBUG] client.fingerprint_manager: fingerprinting for %v failed: %+v", name, err) - return err - } + // Determine whether the fingerprinter is periodic and health checking + isPeriodic, fingerprintPeriod := d.Periodic() + hc, isHealthCheck := d.(fingerprint.HealthCheck) - detected, err := fm.fingerprint(name, f) - if err != nil { - return err - } + // Nothing to do since the state of this driver will never change + if !isPeriodic && !isHealthCheck { + return + } - // log the fingerprinters which have been applied - if detected { - appliedFingerprints = append(appliedFingerprints, name) + // Setup the required tickers + if isPeriodic { + ticker := time.NewTicker(fingerprintPeriod) + fingerprintTicker = ticker.C + defer ticker.Stop() + fm.logger.Printf("[DEBUG] client.fingerprint_manager: fingerprinting driver %s every %v", name, fingerprintPeriod) + } + if isHealthCheck { + // Determine the interval at which to health check + req := &cstructs.HealthCheckIntervalRequest{} + var resp cstructs.HealthCheckIntervalResponse + + if err := hc.GetHealthCheckInterval(req, &resp); err != nil { + fm.logger.Printf("[ERR] client.fingerprint_manager: error getting health check interval for driver %s: %v", name, err) + } else if resp.Eligible { + ticker := time.NewTicker(resp.Period) + healthTicker = ticker.C + defer ticker.Stop() + fm.logger.Printf("[DEBUG] client.fingerprint_manager: health checking driver %s every %v", name, resp.Period) } + } - p, period := f.Periodic() - if p { - go fm.run(f, period, name) + for { + select { + case <-fm.shutdownCh: + return + case <-fingerprintTicker: + if _, err := fm.fingerprintDriver(name, d); err != nil { + fm.logger.Printf("[DEBUG] client.fingerprint_manager: periodic fingerprinting for driver %v failed: %+v", name, err) + } + case <-healthTicker: + // Determine if we should run the health check + fm.nodeLock.Lock() + driver, detected := fm.node.Drivers[name] + if detected && driver != nil { + detected = driver.Detected + } + fm.nodeLock.Unlock() + + if detected { + if err := fm.runDriverHealthCheck(name, hc); err != nil { + fm.logger.Printf("[DEBUG] client.fingerprint_manager: health checking for %v failed: %v", name, err) + } + } else { + // If the driver is undetected, change the health status to unhealthy + // only once. + healthInfo := &structs.DriverInfo{ + Healthy: false, + HealthDescription: fmt.Sprintf("Driver %s is not detected", name), + UpdateTime: time.Now(), + } + if node := fm.updateNodeFromDriver(name, nil, healthInfo); node != nil { + fm.nodeLock.Lock() + fm.node = node + fm.nodeLock.Unlock() + } + } } } - - fm.logger.Printf("[DEBUG] client.fingerprint_manager: detected fingerprints %v", appliedFingerprints) - return nil } -// Run starts the process of fingerprinting the node. It does an initial pass, -// identifying whitelisted and blacklisted fingerprints/drivers. Then, for -// those which require periotic checking, it starts a periodic process for -// each. -func (fp *FingerprintManager) Run() error { - // first, set up all fingerprints - cfg := fp.getConfig() - whitelistFingerprints := cfg.ReadStringListToMap("fingerprint.whitelist") - whitelistFingerprintsEnabled := len(whitelistFingerprints) > 0 - blacklistFingerprints := cfg.ReadStringListToMap("fingerprint.blacklist") - - fp.logger.Printf("[DEBUG] client.fingerprint_manager: built-in fingerprints: %v", fingerprint.BuiltinFingerprints()) +// fingerprintDriver is a temporary solution to move towards DriverInfo and +// away from annotating a node's attributes to demonstrate support for a +// particular driver. Takes the FingerprintResponse and converts it to the +// proper DriverInfo update and then sets the prefix attributes as well +func (fm *FingerprintManager) fingerprintDriver(name string, f fingerprint.Fingerprint) (bool, error) { + var response cstructs.FingerprintResponse - var availableFingerprints []string - var skippedFingerprints []string - for _, name := range fingerprint.BuiltinFingerprints() { - // Skip modules that are not in the whitelist if it is enabled. - if _, ok := whitelistFingerprints[name]; whitelistFingerprintsEnabled && !ok { - skippedFingerprints = append(skippedFingerprints, name) - continue - } - // Skip modules that are in the blacklist - if _, ok := blacklistFingerprints[name]; ok { - skippedFingerprints = append(skippedFingerprints, name) - continue - } + fm.nodeLock.Lock() + request := &cstructs.FingerprintRequest{Config: fm.getConfig(), Node: fm.node} + err := f.Fingerprint(request, &response) + fm.nodeLock.Unlock() - availableFingerprints = append(availableFingerprints, name) + if err != nil { + return false, err } - if err := fp.setupFingerprinters(availableFingerprints); err != nil { - return err + if node := fm.updateNodeAttributes(&response); node != nil { + fm.nodeLock.Lock() + fm.node = node + fm.nodeLock.Unlock() } - if len(skippedFingerprints) != 0 { - fp.logger.Printf("[DEBUG] client.fingerprint_manager: fingerprint modules skipped due to white/blacklist: %v", skippedFingerprints) + // COMPAT: Remove in 0.9: As of Nomad 0.8 there is a temporary measure to + // update all driver attributes to its corresponding driver info object, + // as eventually all drivers will need to + // support this. Doing this so that we can enable this iteratively and also + // in a backwards compatible way, where node attributes for drivers will + // eventually be phased out. + strippedAttributes := make(map[string]string, 0) + for k, v := range response.Attributes { + copy := k + strings.Replace(copy, "driver.", "", 1) + strippedAttributes[k] = v } - // next, set up drivers - // Build the white/blacklists of drivers. - whitelistDrivers := cfg.ReadStringListToMap("driver.whitelist") - whitelistDriversEnabled := len(whitelistDrivers) > 0 - blacklistDrivers := cfg.ReadStringListToMap("driver.blacklist") - - var availDrivers []string - var skippedDrivers []string - - for name := range driver.BuiltinDrivers { - // Skip fingerprinting drivers that are not in the whitelist if it is - // enabled. - if _, ok := whitelistDrivers[name]; whitelistDriversEnabled && !ok { - skippedDrivers = append(skippedDrivers, name) - continue - } - // Skip fingerprinting drivers that are in the blacklist - if _, ok := blacklistDrivers[name]; ok { - skippedDrivers = append(skippedDrivers, name) - continue - } - - availDrivers = append(availDrivers, name) + di := &structs.DriverInfo{ + Attributes: strippedAttributes, + Detected: response.Detected, + } + if node := fm.updateNodeFromDriver(name, di, nil); node != nil { + fm.nodeLock.Lock() + fm.node = node + fm.nodeLock.Unlock() } - if err := fp.setupDrivers(availDrivers); err != nil { + return response.Detected, nil +} + +// runDriverHealthCheck checks the health of the specified resource. +func (fm *FingerprintManager) runDriverHealthCheck(name string, hc fingerprint.HealthCheck) error { + request := &cstructs.HealthCheckRequest{} + var response cstructs.HealthCheckResponse + if err := hc.HealthCheck(request, &response); err != nil { return err } - if len(skippedDrivers) > 0 { - fp.logger.Printf("[DEBUG] client.fingerprint_manager: drivers skipped due to white/blacklist: %v", skippedDrivers) + // Update the status of the node irregardless if there was an error- in the + // case of periodic health checks, an error will occur if a health check + // fails + if node := fm.updateNodeFromDriver(name, nil, response.Drivers[name]); node != nil { + fm.nodeLock.Lock() + fm.node = node + fm.nodeLock.Unlock() } + return nil } diff --git a/client/fingerprint_manager_test.go b/client/fingerprint_manager_test.go index 30f12e0256e..82cb2186f8c 100644 --- a/client/fingerprint_manager_test.go +++ b/client/fingerprint_manager_test.go @@ -2,11 +2,12 @@ package client import ( "fmt" + "log" + "os" "testing" "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/client/driver" - "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" "github.com/stretchr/testify/require" ) @@ -16,29 +17,33 @@ func TestFingerprintManager_Run_MockDriver(t *testing.T) { t.Parallel() require := require.New(t) - node := &structs.Node{ - Attributes: make(map[string]string, 0), - Links: make(map[string]string, 0), - Resources: &structs.Resources{}, - } - testConfig := config.Config{Node: node} - testClient := &Client{config: &testConfig} - conf := config.DefaultConfig() + s1, serverAddr := testServer(t, nil) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) - getConfig := func() *config.Config { - return conf - } + testClient := TestClient(t, func(c *config.Config) { + c.RPCHandler = s1 + c.Servers = []string{serverAddr} + }) + + testClient.logger = log.New(os.Stderr, "", log.LstdFlags) + defer testClient.Shutdown() + waitTilNodeReady(testClient, t) fm := NewFingerprintManager( - getConfig, - node, - make(chan struct{}), + testClient.GetConfig, + testClient.config.Node, + testClient.shutdownCh, testClient.updateNodeFromFingerprint, + testClient.updateNodeFromDriver, testLogger(), ) err := fm.Run() require.Nil(err) + + node := testClient.config.Node + require.NotEqual("", node.Attributes["driver.mock_driver"]) } @@ -47,29 +52,33 @@ func TestFingerprintManager_Run_ResourcesFingerprint(t *testing.T) { t.Parallel() require := require.New(t) - node := &structs.Node{ - Attributes: make(map[string]string, 0), - Links: make(map[string]string, 0), - Resources: &structs.Resources{}, - } - testConfig := config.Config{Node: node} - testClient := &Client{config: &testConfig} + s1, serverAddr := testServer(t, nil) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) - conf := config.DefaultConfig() - getConfig := func() *config.Config { - return conf - } + testClient := TestClient(t, func(c *config.Config) { + c.RPCHandler = s1 + c.Servers = []string{serverAddr} + }) + + testClient.logger = testLogger() + defer testClient.Shutdown() + waitTilNodeReady(testClient, t) fm := NewFingerprintManager( - getConfig, - node, - make(chan struct{}), + testClient.GetConfig, + testClient.config.Node, + testClient.shutdownCh, testClient.updateNodeFromFingerprint, - testLogger(), + testClient.updateNodeFromDriver, + testClient.logger, ) err := fm.Run() require.Nil(err) + + node := testClient.config.Node + require.NotEqual(0, node.Resources.CPU) require.NotEqual(0, node.Resources.MemoryMB) require.NotZero(node.Resources.DiskMB) @@ -79,87 +88,203 @@ func TestFingerprintManager_Fingerprint_Run(t *testing.T) { t.Parallel() require := require.New(t) - node := &structs.Node{ - Attributes: make(map[string]string, 0), - Links: make(map[string]string, 0), - Resources: &structs.Resources{}, - } - testConfig := config.Config{Node: node} - testClient := &Client{config: &testConfig} + s1, serverAddr := testServer(t, nil) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) - conf := config.DefaultConfig() - conf.Options = map[string]string{"driver.raw_exec.enable": "true"} - getConfig := func() *config.Config { - return conf - } + testClient := TestClient(t, func(c *config.Config) { + c.RPCHandler = s1 + c.Servers = []string{serverAddr} + }) + + testClient.logger = testLogger() + defer testClient.Shutdown() + waitTilNodeReady(testClient, t) fm := NewFingerprintManager( - getConfig, - node, - make(chan struct{}), + testClient.GetConfig, + testClient.config.Node, + testClient.shutdownCh, testClient.updateNodeFromFingerprint, - testLogger(), + testClient.updateNodeFromDriver, + testClient.logger, ) err := fm.Run() require.Nil(err) + node := testClient.config.Node + require.NotEqual("", node.Attributes["driver.raw_exec"]) + require.True(node.Drivers["raw_exec"].Detected) + require.True(node.Drivers["raw_exec"].Healthy) } func TestFingerprintManager_Fingerprint_Periodic(t *testing.T) { t.Parallel() require := require.New(t) - node := &structs.Node{ - Attributes: make(map[string]string, 0), - Links: make(map[string]string, 0), - Resources: &structs.Resources{}, - } - testConfig := config.Config{Node: node} - testClient := &Client{config: &testConfig} + s1, serverAddr := testServer(t, nil) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) + + testClient := TestClient(t, func(c *config.Config) { + c.RPCHandler = s1 + c.Servers = []string{serverAddr} + c.Options = map[string]string{ + "driver.raw_exec.enable": "1", + "test.shutdown_periodic_after": "true", + "test.shutdown_periodic_duration": "2", + } + }) + + testClient.logger = testLogger() + defer testClient.Shutdown() + waitTilNodeReady(testClient, t) + + fm := NewFingerprintManager( + testClient.GetConfig, + testClient.config.Node, + testClient.shutdownCh, + testClient.updateNodeFromFingerprint, + testClient.updateNodeFromDriver, + testClient.logger, + ) + + err := fm.Run() + require.Nil(err) - conf := config.DefaultConfig() - conf.Options = map[string]string{ - "test.shutdown_periodic_after": "true", - "test.shutdown_periodic_duration": "3", + { + // Ensure the mock driver is registered and healthy on the client + testutil.WaitForResult(func() (bool, error) { + fm.nodeLock.Lock() + node := fm.node + defer fm.nodeLock.Unlock() + + mockDriverStatus := node.Attributes["driver.mock_driver"] + if mockDriverStatus == "" { + return false, fmt.Errorf("mock driver attribute should be set on the client") + } + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) } - getConfig := func() *config.Config { - return conf + // Ensure that the client fingerprinter eventually removes this attribute and + // marks the driver as unhealthy + { + testutil.WaitForResult(func() (bool, error) { + fm.nodeLock.Lock() + node := fm.node + defer fm.nodeLock.Unlock() + + mockDriverStatus := node.Attributes["driver.mock_driver"] + if mockDriverStatus != "" { + return false, fmt.Errorf("mock driver attribute should not be set on the client") + } + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) } +} - shutdownCh := make(chan struct{}) - defer (func() { - close(shutdownCh) - })() +// This is a temporary measure to check that a driver has both attributes on a +// node set as well as DriverInfo. +func TestFingerprintManager_HealthCheck_Driver(t *testing.T) { + t.Parallel() + require := require.New(t) + + s1, serverAddr := testServer(t, nil) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) + + testClient := TestClient(t, func(c *config.Config) { + c.RPCHandler = s1 + c.Servers = []string{serverAddr} + c.Options = map[string]string{ + "driver.raw_exec.enable": "1", + "test.shutdown_periodic_after": "true", + "test.shutdown_periodic_duration": "2", + } + }) + + testClient.logger = testLogger() + defer testClient.Shutdown() + waitTilNodeReady(testClient, t) fm := NewFingerprintManager( - getConfig, - node, - shutdownCh, + testClient.GetConfig, + testClient.config.Node, + testClient.shutdownCh, testClient.updateNodeFromFingerprint, - testLogger(), + testClient.updateNodeFromDriver, + testClient.logger, ) err := fm.Run() require.Nil(err) - // Ensure the mock driver is registered on the client + // Ensure the mock driver is registered and healthy on the client + testutil.WaitForResult(func() (bool, error) { + fm.nodeLock.Lock() + node := fm.node + defer fm.nodeLock.Unlock() + + mockDriverAttribute := node.Attributes["driver.mock_driver"] + if mockDriverAttribute == "" { + return false, fmt.Errorf("mock driver info should be set on the client attributes") + } + mockDriverInfo := node.Drivers["mock_driver"] + if mockDriverInfo == nil { + return false, fmt.Errorf("mock driver info should be set on the client") + } + if !mockDriverInfo.Healthy { + return false, fmt.Errorf("mock driver info should be healthy") + } + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) + + // Ensure that a default driver without health checks enabled is registered and healthy on the client testutil.WaitForResult(func() (bool, error) { - mockDriverStatus := node.Attributes["driver.mock_driver"] - if mockDriverStatus == "" { - return false, fmt.Errorf("mock driver attribute should be set on the client") + fm.nodeLock.Lock() + node := fm.node + defer fm.nodeLock.Unlock() + + rawExecAttribute := node.Attributes["driver.raw_exec"] + if rawExecAttribute == "" { + return false, fmt.Errorf("raw exec info should be set on the client attributes") + } + rawExecInfo := node.Drivers["raw_exec"] + if rawExecInfo == nil { + return false, fmt.Errorf("raw exec driver info should be set on the client") + } + if !rawExecInfo.Detected { + return false, fmt.Errorf("raw exec driver should be detected") } return true, nil }, func(err error) { t.Fatalf("err: %v", err) }) - // Ensure that the client fingerprinter eventually removes this attribute + // Ensure the mock driver is de-registered when it becomes unhealthy testutil.WaitForResult(func() (bool, error) { - mockDriverStatus := node.Attributes["driver.mock_driver"] - if mockDriverStatus != "" { - return false, fmt.Errorf("mock driver attribute should not be set on the client") + fm.nodeLock.Lock() + node := fm.node + defer fm.nodeLock.Unlock() + + mockDriverAttribute := node.Attributes["driver.mock_driver"] + if mockDriverAttribute == "" { + return false, fmt.Errorf("mock driver info should be set on the client attributes") + } + mockDriverInfo := node.Drivers["mock_driver"] + if mockDriverInfo == nil { + return false, fmt.Errorf("mock driver info should be set on the client") + } + if !mockDriverInfo.Healthy { + return false, fmt.Errorf("mock driver info should not be healthy") } return true, nil }, func(err error) { @@ -167,39 +292,146 @@ func TestFingerprintManager_Fingerprint_Periodic(t *testing.T) { }) } -func TestFingerprintManager_Run_InWhitelist(t *testing.T) { +func TestFingerprintManager_HealthCheck_Periodic(t *testing.T) { t.Parallel() require := require.New(t) - node := &structs.Node{ - Attributes: make(map[string]string, 0), - Links: make(map[string]string, 0), - Resources: &structs.Resources{}, - } - testConfig := config.Config{Node: node} - testClient := &Client{config: &testConfig} + s1, serverAddr := testServer(t, nil) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) + + testClient := TestClient(t, func(c *config.Config) { + c.RPCHandler = s1 + c.Servers = []string{serverAddr} + c.Options = map[string]string{ + "driver.raw_exec.enable": "1", + "test.shutdown_periodic_after": "true", + "test.shutdown_periodic_duration": "2", + } + }) + + testClient.logger = testLogger() + defer testClient.Shutdown() + waitTilNodeReady(testClient, t) + + fm := NewFingerprintManager( + testClient.GetConfig, + testClient.config.Node, + testClient.shutdownCh, + testClient.updateNodeFromFingerprint, + testClient.updateNodeFromDriver, + testClient.logger, + ) - conf := config.DefaultConfig() - conf.Options = map[string]string{"fingerprint.whitelist": " arch,cpu,memory,network,storage,foo,bar "} - getConfig := func() *config.Config { - return conf + err := fm.Run() + require.Nil(err) + + { + // Ensure the mock driver is registered and healthy on the client + testutil.WaitForResult(func() (bool, error) { + fm.nodeLock.Lock() + node := fm.node + defer fm.nodeLock.Unlock() + + mockDriverInfo := node.Drivers["mock_driver"] + if mockDriverInfo == nil { + return false, fmt.Errorf("mock driver info should be set on the client") + } + if !mockDriverInfo.Detected { + return false, fmt.Errorf("mock driver info should be detected") + } + if !mockDriverInfo.Healthy { + return false, fmt.Errorf("mock driver info should be healthy") + } + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) + } + { + // Ensure that the client health check eventually removes this attribute and + // marks the driver as unhealthy + testutil.WaitForResult(func() (bool, error) { + fm.nodeLock.Lock() + node := fm.node + defer fm.nodeLock.Unlock() + + mockDriverInfo := node.Drivers["mock_driver"] + if mockDriverInfo == nil { + return false, fmt.Errorf("mock driver info should be set on the client") + } + if !mockDriverInfo.Detected { + return false, fmt.Errorf("mock driver info should be detected") + } + if !mockDriverInfo.Healthy { + return false, fmt.Errorf("mock driver info should be healthy") + } + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) + } + { + // Ensure that the client health check eventually removes this attribute and + // marks the driver as unhealthy + testutil.WaitForResult(func() (bool, error) { + fm.nodeLock.Lock() + node := fm.node + defer fm.nodeLock.Unlock() + + mockDriverInfo := node.Drivers["mock_driver"] + if mockDriverInfo == nil { + return false, fmt.Errorf("mock driver info should be set on the client") + } + if mockDriverInfo.Detected { + return false, fmt.Errorf("mock driver should be detected") + } + if mockDriverInfo.Healthy { + return false, fmt.Errorf("mock driver should not be healthy") + } + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) } +} - shutdownCh := make(chan struct{}) - defer (func() { - close(shutdownCh) - })() +func TestFimgerprintManager_Run_InWhitelist(t *testing.T) { + t.Parallel() + require := require.New(t) + + s1, serverAddr := testServer(t, nil) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) + + testClient := TestClient(t, func(c *config.Config) { + c.RPCHandler = s1 + c.Servers = []string{serverAddr} + c.Options = map[string]string{ + "driver.raw_exec.enable": "1", + "test.shutdown_periodic_after": "true", + "test.shutdown_periodic_duration": "2", + } + }) + + testClient.logger = testLogger() + defer testClient.Shutdown() + waitTilNodeReady(testClient, t) fm := NewFingerprintManager( - getConfig, - node, - shutdownCh, + testClient.GetConfig, + testClient.config.Node, + testClient.shutdownCh, testClient.updateNodeFromFingerprint, - testLogger(), + testClient.updateNodeFromDriver, + testClient.logger, ) err := fm.Run() require.Nil(err) + + node := testClient.config.Node + require.NotEqual(node.Attributes["cpu.frequency"], "") } @@ -207,36 +439,37 @@ func TestFingerprintManager_Run_InBlacklist(t *testing.T) { t.Parallel() require := require.New(t) - node := &structs.Node{ - Attributes: make(map[string]string, 0), - Links: make(map[string]string, 0), - Resources: &structs.Resources{}, - } - testConfig := config.Config{Node: node} - testClient := &Client{config: &testConfig} - - conf := config.DefaultConfig() - conf.Options = map[string]string{"fingerprint.whitelist": " arch,memory,foo,bar "} - conf.Options = map[string]string{"fingerprint.blacklist": " cpu "} - getConfig := func() *config.Config { - return conf - } + s1, serverAddr := testServer(t, nil) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) - shutdownCh := make(chan struct{}) - defer (func() { - close(shutdownCh) - })() + testClient := TestClient(t, func(c *config.Config) { + c.RPCHandler = s1 + c.Servers = []string{serverAddr} + c.Options = map[string]string{ + "fingerprint.whitelist": " arch,memory,foo,bar ", + "fingerprint.blacklist": " cpu ", + } + }) + + testClient.logger = testLogger() + defer testClient.Shutdown() + waitTilNodeReady(testClient, t) fm := NewFingerprintManager( - getConfig, - node, - shutdownCh, + testClient.GetConfig, + testClient.config.Node, + testClient.shutdownCh, testClient.updateNodeFromFingerprint, - testLogger(), + testClient.updateNodeFromDriver, + testClient.logger, ) err := fm.Run() require.Nil(err) + + node := testClient.config.Node + require.Equal(node.Attributes["cpu.frequency"], "") require.NotEqual(node.Attributes["memory.totalbytes"], "") } @@ -245,36 +478,37 @@ func TestFingerprintManager_Run_Combination(t *testing.T) { t.Parallel() require := require.New(t) - node := &structs.Node{ - Attributes: make(map[string]string, 0), - Links: make(map[string]string, 0), - Resources: &structs.Resources{}, - } - testConfig := config.Config{Node: node} - testClient := &Client{config: &testConfig} - - conf := config.DefaultConfig() - conf.Options = map[string]string{"fingerprint.whitelist": " arch,cpu,memory,foo,bar "} - conf.Options = map[string]string{"fingerprint.blacklist": " memory,nomad "} - getConfig := func() *config.Config { - return conf - } + s1, serverAddr := testServer(t, nil) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) + + testClient := TestClient(t, func(c *config.Config) { + c.RPCHandler = s1 + c.Servers = []string{serverAddr} + c.Options = map[string]string{ + "fingerprint.whitelist": " arch,cpu,memory,foo,bar ", + "fingerprint.blacklist": " memory,nomad ", + } + }) - shutdownCh := make(chan struct{}) - defer (func() { - close(shutdownCh) - })() + testClient.logger = testLogger() + defer testClient.Shutdown() + waitTilNodeReady(testClient, t) fm := NewFingerprintManager( - getConfig, - node, - shutdownCh, + testClient.GetConfig, + testClient.config.Node, + testClient.shutdownCh, testClient.updateNodeFromFingerprint, - testLogger(), + testClient.updateNodeFromDriver, + testClient.logger, ) err := fm.Run() require.Nil(err) + + node := testClient.config.Node + require.NotEqual(node.Attributes["cpu.frequency"], "") require.NotEqual(node.Attributes["cpu.arch"], "") require.Equal(node.Attributes["memory.totalbytes"], "") @@ -285,38 +519,36 @@ func TestFingerprintManager_Run_WhitelistDrivers(t *testing.T) { t.Parallel() require := require.New(t) - node := &structs.Node{ - Attributes: make(map[string]string, 0), - Links: make(map[string]string, 0), - Resources: &structs.Resources{}, - } - testConfig := config.Config{Node: node} - testClient := &Client{config: &testConfig} + s1, serverAddr := testServer(t, nil) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) - conf := config.DefaultConfig() - conf.Options = map[string]string{ - "driver.raw_exec.enable": "1", - "driver.whitelist": " raw_exec , foo ", - } - getConfig := func() *config.Config { - return conf - } + testClient := TestClient(t, func(c *config.Config) { + c.RPCHandler = s1 + c.Servers = []string{serverAddr} + c.Options = map[string]string{ + "driver.raw_exec.enable": "1", + "driver.whitelist": " raw_exec , foo ", + } + }) - shutdownCh := make(chan struct{}) - defer (func() { - close(shutdownCh) - })() + testClient.logger = testLogger() + defer testClient.Shutdown() + waitTilNodeReady(testClient, t) fm := NewFingerprintManager( - getConfig, - node, - shutdownCh, + testClient.GetConfig, + testClient.config.Node, + testClient.shutdownCh, testClient.updateNodeFromFingerprint, - testLogger(), + testClient.updateNodeFromDriver, + testClient.logger, ) err := fm.Run() require.Nil(err) + + node := testClient.config.Node require.NotEqual(node.Attributes["driver.raw_exec"], "") } @@ -324,37 +556,36 @@ func TestFingerprintManager_Run_AllDriversBlacklisted(t *testing.T) { t.Parallel() require := require.New(t) - node := &structs.Node{ - Attributes: make(map[string]string, 0), - Links: make(map[string]string, 0), - Resources: &structs.Resources{}, - } - testConfig := config.Config{Node: node} - testClient := &Client{config: &testConfig} + s1, serverAddr := testServer(t, nil) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) - conf := config.DefaultConfig() - conf.Options = map[string]string{ - "driver.whitelist": " foo,bar,baz ", - } - getConfig := func() *config.Config { - return conf - } + testClient := TestClient(t, func(c *config.Config) { + c.RPCHandler = s1 + c.Servers = []string{serverAddr} + c.Options = map[string]string{ + "driver.whitelist": " foo,bar,baz ", + } + }) - shutdownCh := make(chan struct{}) - defer (func() { - close(shutdownCh) - })() + testClient.logger = testLogger() + defer testClient.Shutdown() + waitTilNodeReady(testClient, t) fm := NewFingerprintManager( - getConfig, - node, - shutdownCh, + testClient.GetConfig, + testClient.config.Node, + testClient.shutdownCh, testClient.updateNodeFromFingerprint, - testLogger(), + testClient.updateNodeFromDriver, + testClient.logger, ) err := fm.Run() require.Nil(err) + + node := testClient.config.Node + require.Equal(node.Attributes["driver.raw_exec"], "") require.Equal(node.Attributes["driver.exec"], "") require.Equal(node.Attributes["driver.docker"], "") @@ -364,39 +595,38 @@ func TestFingerprintManager_Run_DriversWhiteListBlacklistCombination(t *testing. t.Parallel() require := require.New(t) - node := &structs.Node{ - Attributes: make(map[string]string, 0), - Links: make(map[string]string, 0), - Resources: &structs.Resources{}, - } - testConfig := config.Config{Node: node} - testClient := &Client{config: &testConfig} - - conf := config.DefaultConfig() - conf.Options = map[string]string{ - "driver.raw_exec.enable": "1", - "driver.whitelist": " raw_exec,exec,foo,bar,baz ", - "driver.blacklist": " exec,foo,bar,baz ", - } - getConfig := func() *config.Config { - return conf - } + s1, serverAddr := testServer(t, nil) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) + + testClient := TestClient(t, func(c *config.Config) { + c.RPCHandler = s1 + c.Servers = []string{serverAddr} + c.Options = map[string]string{ + "driver.raw_exec.enable": "1", + "driver.whitelist": " raw_exec,exec,foo,bar,baz ", + "driver.blacklist": " exec,foo,bar,baz ", + } + }) - shutdownCh := make(chan struct{}) - defer (func() { - close(shutdownCh) - })() + testClient.logger = testLogger() + defer testClient.Shutdown() + waitTilNodeReady(testClient, t) fm := NewFingerprintManager( - getConfig, - node, - shutdownCh, + testClient.GetConfig, + testClient.config.Node, + testClient.shutdownCh, testClient.updateNodeFromFingerprint, - testLogger(), + testClient.updateNodeFromDriver, + testClient.logger, ) err := fm.Run() require.Nil(err) + + node := testClient.config.Node + require.NotEqual(node.Attributes["driver.raw_exec"], "") require.Equal(node.Attributes["driver.exec"], "") require.Equal(node.Attributes["foo"], "") @@ -408,36 +638,38 @@ func TestFingerprintManager_Run_DriversInBlacklist(t *testing.T) { t.Parallel() require := require.New(t) - node := &structs.Node{ - Attributes: make(map[string]string, 0), - Links: make(map[string]string, 0), - Resources: &structs.Resources{}, - } - conf := config.DefaultConfig() - conf.Options = map[string]string{ - "driver.raw_exec.enable": "1", - "driver.whitelist": " raw_exec,foo,bar,baz ", - "driver.blacklist": " exec,foo,bar,baz ", - } - conf.Node = node - - testClient := &Client{config: conf} + s1, serverAddr := testServer(t, nil) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) + + testClient := TestClient(t, func(c *config.Config) { + c.RPCHandler = s1 + c.Servers = []string{serverAddr} + c.Options = map[string]string{ + "driver.raw_exec.enable": "1", + "driver.whitelist": " raw_exec,foo,bar,baz ", + "driver.blacklist": " exec,foo,bar,baz ", + } + }) - shutdownCh := make(chan struct{}) - defer (func() { - close(shutdownCh) - })() + testClient.logger = testLogger() + defer testClient.Shutdown() + waitTilNodeReady(testClient, t) fm := NewFingerprintManager( testClient.GetConfig, - node, - shutdownCh, + testClient.config.Node, + testClient.shutdownCh, testClient.updateNodeFromFingerprint, - testLogger(), + testClient.updateNodeFromDriver, + testClient.logger, ) err := fm.Run() require.Nil(err) + + node := testClient.config.Node + require.NotEqual(node.Attributes["driver.raw_exec"], "") require.Equal(node.Attributes["driver.exec"], "") } diff --git a/client/structs/structs.go b/client/structs/structs.go index c038d1ff084..af28b55ca6c 100644 --- a/client/structs/structs.go +++ b/client/structs/structs.go @@ -405,3 +405,31 @@ func (f *FingerprintResponse) RemoveLink(name string) { f.Links[name] = "" } + +// HealthCheckRequest is the request type for a type that fulfils the Health +// Check interface +type HealthCheckRequest struct{} + +// HealthCheckResponse is the response type for a type that fulfills the Health +// Check interface +type HealthCheckResponse struct { + // Drivers is a map of driver names to current driver information + Drivers map[string]*structs.DriverInfo +} + +type HealthCheckIntervalRequest struct{} +type HealthCheckIntervalResponse struct { + Eligible bool + Period time.Duration +} + +// AddDriverInfo adds information about a driver to the fingerprint response. +// If the Drivers field has not yet been initialized, it does so here. +func (h *HealthCheckResponse) AddDriverInfo(name string, driverInfo *structs.DriverInfo) { + // initialize Drivers if it has not been already + if h.Drivers == nil { + h.Drivers = make(map[string]*structs.DriverInfo, 0) + } + + h.Drivers[name] = driverInfo +} diff --git a/nomad/structs/node.go b/nomad/structs/node.go new file mode 100644 index 00000000000..a4eb91e719c --- /dev/null +++ b/nomad/structs/node.go @@ -0,0 +1,49 @@ +package structs + +import ( + "time" +) + +// DriverInfo is the current state of a single driver. This is updated +// regularly as driver health changes on the node. +type DriverInfo struct { + Attributes map[string]string + Detected bool + Healthy bool + HealthDescription string + UpdateTime time.Time +} + +// MergeHealthCheck merges information from a health check for a drier into a +// node's driver info +func (di *DriverInfo) MergeHealthCheck(other *DriverInfo) { + di.Healthy = other.Healthy + di.HealthDescription = other.HealthDescription + di.UpdateTime = other.UpdateTime +} + +// MergeFingerprint merges information from fingerprinting a node for a driver +// into a node's driver info for that driver. +func (di *DriverInfo) MergeFingerprintInfo(other *DriverInfo) { + di.Detected = other.Detected + di.Attributes = other.Attributes +} + +// DriverInfo determines if two driver info objects are equal..As this is used +// in the process of health checking, we only check the fields that are +// computed by the health checker. In the future, this will be merged. +func (di *DriverInfo) HealthCheckEquals(other *DriverInfo) bool { + if di == nil && other == nil { + return true + } + + if di.Healthy != other.Healthy { + return false + } + + if di.HealthDescription != other.HealthDescription { + return false + } + + return true +} diff --git a/nomad/structs/node_test.go b/nomad/structs/node_test.go new file mode 100644 index 00000000000..aee21accb01 --- /dev/null +++ b/nomad/structs/node_test.go @@ -0,0 +1,62 @@ +package structs + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestDriverInfoEquals(t *testing.T) { + require := require.New(t) + var driverInfoTest = []struct { + input []*DriverInfo + expected bool + errorMsg string + }{ + { + []*DriverInfo{ + { + Healthy: true, + }, + { + Healthy: false, + }, + }, + false, + "Different healthy values should not be equal.", + }, + { + []*DriverInfo{ + { + HealthDescription: "not running", + }, + { + HealthDescription: "running", + }, + }, + false, + "Different health description values should not be equal.", + }, + { + []*DriverInfo{ + { + Detected: false, + Healthy: true, + HealthDescription: "This driver is ok", + }, + { + Detected: true, + Healthy: true, + HealthDescription: "This driver is ok", + }, + }, + true, + "Same health check should be equal", + }, + } + for _, testCase := range driverInfoTest { + first := testCase.input[0] + second := testCase.input[1] + require.Equal(testCase.expected, first.HealthCheckEquals(second), testCase.errorMsg) + } +} diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index a50df270c8d..54c89fb95e2 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1241,6 +1241,9 @@ type Node struct { // retaining only MaxRetainedNodeEvents number at a time Events []*NodeEvent + // Drivers is a map of driver names to current driver information + Drivers map[string]*DriverInfo + // Raft Indexes CreateIndex uint64 ModifyIndex uint64 diff --git a/scheduler/feasible.go b/scheduler/feasible.go index 9ba83195c95..4b1a995710b 100644 --- a/scheduler/feasible.go +++ b/scheduler/feasible.go @@ -129,6 +129,22 @@ func (c *DriverChecker) Feasible(option *structs.Node) bool { func (c *DriverChecker) hasDrivers(option *structs.Node) bool { for driver := range c.drivers { driverStr := fmt.Sprintf("driver.%s", driver) + + // COMPAT: Remove in 0.10: As of Nomad 0.8, nodes have a DriverInfo that + // corresponds with every driver. As a Nomad server might be on a later + // version than a Nomad client, we need to check for compatibility here + // to verify the client supports this. + if driverInfo, ok := option.Drivers[driver]; ok { + if driverInfo == nil { + c.ctx.Logger(). + Printf("[WARN] scheduler.DriverChecker: node %v has no driver info set for %v", + option.ID, driver) + return false + } + + return driverInfo.Detected && driverInfo.Healthy + } + value, ok := option.Attributes[driverStr] if !ok { return false diff --git a/scheduler/feasible_test.go b/scheduler/feasible_test.go index a22f66e1e76..7b2129e1c78 100644 --- a/scheduler/feasible_test.go +++ b/scheduler/feasible_test.go @@ -4,10 +4,12 @@ import ( "fmt" "reflect" "testing" + "time" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" + "github.com/stretchr/testify/require" ) func TestStaticIterator_Reset(t *testing.T) { @@ -125,6 +127,69 @@ func TestDriverChecker(t *testing.T) { } } +func Test_HealthChecks(t *testing.T) { + require := require.New(t) + _, ctx := testContext(t) + + nodes := []*structs.Node{ + mock.Node(), + mock.Node(), + mock.Node(), + } + for _, e := range nodes { + e.Drivers = make(map[string]*structs.DriverInfo) + } + nodes[0].Attributes["driver.foo"] = "1" + nodes[0].Drivers["foo"] = &structs.DriverInfo{ + Detected: true, + Healthy: true, + HealthDescription: "running", + UpdateTime: time.Now(), + } + nodes[1].Attributes["driver.bar"] = "1" + nodes[1].Drivers["bar"] = &structs.DriverInfo{ + Detected: true, + Healthy: false, + HealthDescription: "not running", + UpdateTime: time.Now(), + } + nodes[2].Attributes["driver.baz"] = "0" + nodes[2].Drivers["baz"] = &structs.DriverInfo{ + Detected: false, + Healthy: false, + HealthDescription: "not running", + UpdateTime: time.Now(), + } + + testDrivers := []string{"foo", "bar", "baz"} + cases := []struct { + Node *structs.Node + Result bool + }{ + { + Node: nodes[0], + Result: true, + }, + { + Node: nodes[1], + Result: false, + }, + { + Node: nodes[2], + Result: false, + }, + } + + for i, c := range cases { + drivers := map[string]struct{}{ + testDrivers[i]: {}, + } + checker := NewDriverChecker(ctx, drivers) + act := checker.Feasible(c.Node) + require.Equal(act, c.Result) + } +} + func TestConstraintChecker(t *testing.T) { _, ctx := testContext(t) nodes := []*structs.Node{