Skip to content

Commit

Permalink
add concept of health checks to fingerprinters and nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
chelseakomlo committed Feb 13, 2018
1 parent 6820c96 commit 9e06dd9
Show file tree
Hide file tree
Showing 9 changed files with 344 additions and 17 deletions.
18 changes: 17 additions & 1 deletion client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic
}

fingerprintManager := NewFingerprintManager(c.GetConfig, c.config.Node,
c.shutdownCh, c.updateNodeFromFingerprint, c.logger)
c.shutdownCh, c.updateNodeFromFingerprint, c.updateNodeFromHealthCheck, c.logger)

// Fingerprint the node and scan for drivers
if err := fingerprintManager.Run(); err != nil {
Expand Down Expand Up @@ -856,6 +856,9 @@ func (c *Client) setupNode() error {
if node.Links == nil {
node.Links = make(map[string]string)
}
if node.Drivers == nil {
node.Drivers = make(map[string]*structs.DriverInfo)
}
if node.Meta == nil {
node.Meta = make(map[string]string)
}
Expand Down Expand Up @@ -948,6 +951,19 @@ func (c *Client) updateNodeFromFingerprint(response *cstructs.FingerprintRespons
if response.Resources != nil {
c.config.Node.Resources.Merge(response.Resources)
}

return c.config.Node
}

func (c *Client) updateNodeFromHealthCheck(response *cstructs.HealthCheckResponse) *structs.Node {
c.configLock.Lock()
defer c.configLock.Unlock()

// update the node with the latest driver health information
for name, val := range response.Drivers {
c.config.Node.Drivers[name] = val
}

return c.config.Node
}

Expand Down
28 changes: 27 additions & 1 deletion client/driver/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,6 @@ func (d *DockerDriver) Fingerprint(req *cstructs.FingerprintRequest, resp *cstru
d.logger.Printf("[INFO] driver.docker: failed to initialize client: %s", err)
}
d.fingerprintSuccess = helper.BoolToPtr(false)
resp.RemoveAttribute(dockerDriverAttr)
return nil
}

Expand Down Expand Up @@ -552,6 +551,33 @@ func (d *DockerDriver) Fingerprint(req *cstructs.FingerprintRequest, resp *cstru
return nil
}

func (d *DockerDriver) Check(req *cstructs.HealthCheckRequest, resp *cstructs.HealthCheckResponse) error {
unhealthy := &structs.DriverInfo{
HealthDescription: "Docker driver is available but unresponsive",
UpdateTime: time.Now(),
}

_, err := client.ListContainers(docker.ListContainersOptions{})
if err != nil {
d.logger.Printf("[WARN] driver.docker: docker driver is available but is unresponsive to `docker ps`")
resp.AddDriverInfo("driver.docker", unhealthy)
return err
}

d.logger.Printf("[DEBUG] driver.docker: docker driver is available and is responsive to `docker ps`")
healthy := &structs.DriverInfo{
Healthy: true,
HealthDescription: "Docker driver is available and responsive",
UpdateTime: time.Now(),
}
resp.AddDriverInfo("driver.docker", healthy)
return nil
}

func (d *DockerDriver) CheckHealthPeriodic() (bool, time.Duration) {
return true, 1 * time.Minute
}

// Validate is used to validate the driver configuration
func (d *DockerDriver) Validate(config map[string]interface{}) error {
fd := &fields.FieldData{
Expand Down
43 changes: 43 additions & 0 deletions client/driver/docker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/driver/env"
"github.com/hashicorp/nomad/client/fingerprint"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/client/testutil"
"github.com/hashicorp/nomad/helper/uuid"
Expand Down Expand Up @@ -164,6 +165,7 @@ func TestDockerDriver_Fingerprint(t *testing.T) {
if !tu.IsTravis() {
t.Parallel()
}

ctx := testDockerDriverContexts(t, &structs.Task{Name: "foo", Driver: "docker", Resources: basicResources})
//ctx.DriverCtx.config.Options = map[string]string{"docker.cleanup.image": "false"}
defer ctx.AllocDir.Destroy()
Expand Down Expand Up @@ -227,6 +229,7 @@ func TestDockerDriver_Fingerprint_Bridge(t *testing.T) {

request := &cstructs.FingerprintRequest{Config: conf, Node: conf.Node}
var response cstructs.FingerprintResponse

err = dd.Fingerprint(request, &response)
if err != nil {
t.Fatalf("error fingerprinting docker: %v", err)
Expand All @@ -251,6 +254,46 @@ func TestDockerDriver_Fingerprint_Bridge(t *testing.T) {
t.Logf("docker bridge ip: %q", attributes["driver.docker.bridge_ip"])
}

func TestDockerDriver_Check_DockerHealthStatus(t *testing.T) {
if !tu.IsTravis() {
t.Parallel()
}
if !testutil.DockerIsConnected(t) {
t.Skip("requires Docker")
}
if runtime.GOOS != "linux" {
t.Skip("expect only on linux")
}

require := require.New(t)

// This seems fragile, so we might need to reconsider this test if it
// proves flaky
expectedAddr, err := sockaddr.GetInterfaceIP("docker0")
if err != nil {
t.Fatalf("unable to get ip for docker0: %v", err)
}
if expectedAddr == "" {
t.Fatalf("unable to get ip for docker bridge")
}

conf := testConfig(t)
conf.Node = mock.Node()
dd := NewDockerDriver(NewDriverContext("", "", conf, conf.Node, testLogger(), nil))

request := &cstructs.HealthCheckRequest{}
var response cstructs.HealthCheckResponse

dc, ok := dd.(fingerprint.HealthCheck)
require.True(ok)
err = dc.Check(request, &response)
require.Nil(err)

driverInfo := response.Drivers["driver.docker"]
require.NotNil(driverInfo)
require.True(driverInfo.Healthy)
}

func TestDockerDriver_StartOpen_Wait(t *testing.T) {
if !tu.IsTravis() {
t.Parallel()
Expand Down
35 changes: 33 additions & 2 deletions client/driver/mock_driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ const (
// to "stop" a previously functioning driver after the specified duration
// (specified in seconds) for testing of periodic drivers and fingerprinters.
ShutdownPeriodicDuration = "test.shutdown_periodic_duration"

mockDriverName = "driver.mock_driver"
)

// Add the mock driver to the list of builtin drivers
Expand Down Expand Up @@ -225,14 +227,43 @@ func (m *MockDriver) Fingerprint(req *cstructs.FingerprintRequest, resp *cstruct
// current time is after the time which the node should shut down, simulate
// driver failure
case !m.shutdownFingerprintTime.IsZero() && time.Now().After(m.shutdownFingerprintTime):
resp.RemoveAttribute("driver.mock_driver")
resp.RemoveAttribute(mockDriverName)
default:
resp.AddAttribute("driver.mock_driver", "1")
resp.AddAttribute(mockDriverName, "1")
resp.Detected = true
}
return nil
}

// Check implements the interface for HealthCheck, and indicates the current
// health status of the mock driver.
func (m *MockDriver) Check(req *cstructs.HealthCheckRequest, resp *cstructs.HealthCheckResponse) error {
if !m.shutdownFingerprintTime.IsZero() && time.Now().After(m.shutdownFingerprintTime) {
notHealthy := &structs.DriverInfo{
Healthy: false,
HealthDescription: "not running",
UpdateTime: time.Now(),
}
resp.AddDriverInfo(mockDriverName, notHealthy)
return nil
}
healthy := &structs.DriverInfo{
Healthy: true,
HealthDescription: "running",
UpdateTime: time.Now(),
}
resp.AddDriverInfo(mockDriverName, healthy)
return nil
}

// CheckHealthPeriodic implements the interface for HealthCheck and indicates
// that mock driver should be checked periodically. Returns a boolean
// indicating if ti should be checked, and the duration at which to do this
// check.
func (m *MockDriver) CheckHealthPeriodic() (bool, time.Duration) {
return true, 1 * time.Second
}

// MockDriverHandle is a driver handler which supervises a mock task
type mockDriverHandle struct {
taskName string
Expand Down
15 changes: 15 additions & 0 deletions client/fingerprint/fingerprint.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,21 @@ func NewFingerprint(name string, logger *log.Logger) (Fingerprint, error) {
// Factory is used to instantiate a new Fingerprint
type Factory func(*log.Logger) Fingerprint

// HealthCheck is used for doing periodic health checks. On a given time
// interfal, a health check will be called by the fingerprint manager of the
// node.
type HealthCheck interface {
// Check is used to update properties of the node on the status of the health
// check
Check(*cstructs.HealthCheckRequest, *cstructs.HealthCheckResponse) error

// CheckHealthPeriodic is a mechanism for the health checker to indicate that
// it should be run periodically. The return value is a boolean indicating
// whether it should be done periodically, and the time interval at which
// this check should happen.
CheckHealthPeriodic() (bool, time.Duration)
}

// Fingerprint is used for doing "fingerprinting" of the
// host to automatically determine attributes, resources,
// and metadata about it. Each of these is a heuristic, and
Expand Down
73 changes: 62 additions & 11 deletions client/fingerprint_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ type FingerprintManager struct {

// updateNode is a callback to the client to update the state of its
// associated node
updateNode func(*cstructs.FingerprintResponse) *structs.Node
logger *log.Logger
updateNode func(*cstructs.FingerprintResponse) *structs.Node
updateHealthCheck func(*cstructs.HealthCheckResponse) *structs.Node
logger *log.Logger
}

// NewFingerprintManager is a constructor that creates and returns an instance
Expand All @@ -32,18 +33,20 @@ func NewFingerprintManager(getConfig func() *config.Config,
node *structs.Node,
shutdownCh chan struct{},
updateNode func(*cstructs.FingerprintResponse) *structs.Node,
updateHealthCheck func(*cstructs.HealthCheckResponse) *structs.Node,
logger *log.Logger) *FingerprintManager {
return &FingerprintManager{
getConfig: getConfig,
updateNode: updateNode,
node: node,
shutdownCh: shutdownCh,
logger: logger,
getConfig: getConfig,
updateNode: updateNode,
updateHealthCheck: updateHealthCheck,
node: node,
shutdownCh: shutdownCh,
logger: logger,
}
}

// run runs each fingerprinter individually on an ongoing basis
func (fm *FingerprintManager) run(f fingerprint.Fingerprint, period time.Duration, name string) {
// runFingerprint runs each fingerprinter individually on an ongoing basis
func (fm *FingerprintManager) runFingerprint(f fingerprint.Fingerprint, period time.Duration, name string) {
fm.logger.Printf("[DEBUG] client.fingerprint_manager: fingerprinting %s every %v", name, period)

for {
Expand All @@ -61,6 +64,25 @@ func (fm *FingerprintManager) run(f fingerprint.Fingerprint, period time.Duratio
}
}

// runHealthCheck runs each health check individually on an ongoing basis
func (fm *FingerprintManager) runHealthCheck(hc fingerprint.HealthCheck, period time.Duration, name string) {
fm.logger.Printf("[DEBUG] client.fingerprint_manager: healthchecking %s every %v", name, period)

for {
select {
case <-time.After(period):
err := fm.healthCheck(name, hc)
if err != nil {
fm.logger.Printf("[DEBUG] client.fingerprint_manager: health checking for %v failed: %+v", name, err)
continue
}

case <-fm.shutdownCh:
return
}
}
}

// setupDrivers is used to fingerprint the node to see if these drivers are
// supported
func (fm *FingerprintManager) setupDrivers(drivers []string) error {
Expand All @@ -86,7 +108,13 @@ func (fm *FingerprintManager) setupDrivers(drivers []string) error {

p, period := d.Periodic()
if p {
go fm.run(d, period, name)
go fm.runFingerprint(d, period, name)
}

if hc, ok := d.(fingerprint.HealthCheck); ok {
if checkPeriodic, interval := hc.CheckHealthPeriodic(); checkPeriodic {
go fm.runHealthCheck(hc, interval, name)
}
}
}

Expand All @@ -113,6 +141,23 @@ func (fm *FingerprintManager) fingerprint(name string, f fingerprint.Fingerprint
return response.Detected, nil
}

// healthcheck checks the health of the specified resource.
func (fm *FingerprintManager) healthCheck(name string, hc fingerprint.HealthCheck) error {
request := &cstructs.HealthCheckRequest{}
var response cstructs.HealthCheckResponse
if err := hc.Check(request, &response); err != nil {
return err
}

fm.nodeLock.Lock()
if node := fm.updateHealthCheck(&response); node != nil {
fm.node = node
}
fm.nodeLock.Unlock()

return nil
}

// setupFingerprints is used to fingerprint the node to see if these attributes are
// supported
func (fm *FingerprintManager) setupFingerprinters(fingerprints []string) error {
Expand All @@ -138,7 +183,13 @@ func (fm *FingerprintManager) setupFingerprinters(fingerprints []string) error {

p, period := f.Periodic()
if p {
go fm.run(f, period, name)
go fm.runFingerprint(f, period, name)
}

if hc, ok := f.(fingerprint.HealthCheck); ok {
if checkPeriodic, interval := hc.CheckHealthPeriodic(); checkPeriodic {
go fm.runHealthCheck(hc, interval, name)
}
}
}

Expand Down
Loading

0 comments on commit 9e06dd9

Please sign in to comment.