diff --git a/client/client.go b/client/client.go index 46d8bd1d706..35adbc53436 100644 --- a/client/client.go +++ b/client/client.go @@ -259,7 +259,13 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic return nil, fmt.Errorf("node setup failed: %v", err) } - fingerprintManager := NewFingerprintManager(c.GetConfig, c.config.Node, + // Store the config copy before restoring state but after it has been + // initialized. + c.configLock.Lock() + c.configCopy = c.config.Copy() + c.configLock.Unlock() + + fingerprintManager := NewFingerprintManager(c.GetConfig, c.configCopy.Node, c.shutdownCh, c.updateNodeFromFingerprint, c.updateNodeFromDriver, c.logger) @@ -271,12 +277,6 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic // Setup the reserved resources c.reservePorts() - // Store the config copy before restoring state but after it has been - // initialized. - c.configLock.Lock() - c.configCopy = c.config.Copy() - c.configLock.Unlock() - // Set the preconfigured list of static servers c.configLock.RLock() if len(c.configCopy.Servers) > 0 { @@ -437,7 +437,7 @@ func (c *Client) Leave() error { func (c *Client) GetConfig() *config.Config { c.configLock.Lock() defer c.configLock.Unlock() - return c.config + return c.configCopy } // Datacenter returns the datacenter for the given client @@ -726,7 +726,7 @@ func (c *Client) restoreState() error { watcher := noopPrevAlloc{} c.configLock.RLock() - ar := NewAllocRunner(c.logger, c.configCopy, c.stateDB, c.updateAllocStatus, alloc, c.vaultClient, c.consulService, watcher) + ar := NewAllocRunner(c.logger, c.configCopy.Copy(), c.stateDB, c.updateAllocStatus, alloc, c.vaultClient, c.consulService, watcher) c.configLock.RUnlock() c.allocLock.Lock() @@ -963,6 +963,9 @@ func (c *Client) reservePorts() { for _, net := range reservedIndex { node.Reserved.Networks = append(node.Reserved.Networks, net) } + + // Make the changes available to the config copy. + c.configCopy = c.config.Copy() } // updateNodeFromFingerprint updates the node with the result of @@ -1009,10 +1012,10 @@ func (c *Client) updateNodeFromFingerprint(response *cstructs.FingerprintRespons } if nodeHasChanged { - c.updateNode() + c.updateNodeLocked() } - return c.config.Node + return c.configCopy.Node } // updateNodeFromDriver receives either a fingerprint of the driver or its @@ -1104,10 +1107,10 @@ func (c *Client) updateNodeFromDriver(name string, fingerprint, health *structs. if hasChanged { c.config.Node.Drivers[name].UpdateTime = time.Now() - c.updateNode() + c.updateNodeLocked() } - return c.config.Node + return c.configCopy.Node } // resourcesAreEqual is a temporary function to compare whether resources are @@ -1752,9 +1755,14 @@ OUTER: } } -// updateNode triggers a client to update its node copy if it isn't doing +// updateNode updates the Node copy and triggers the client to send the updated +// Node to the server. This should be done while holding the configLock lock. // so already -func (c *Client) updateNode() { +func (c *Client) updateNodeLocked() { + // Update the config copy. + node := c.config.Node.Copy() + c.configCopy.Node = node + select { case c.triggerNodeUpdate <- struct{}{}: // Node update goroutine was released to execute @@ -1774,15 +1782,7 @@ func (c *Client) watchNodeUpdates() { select { case <-timer.C: c.logger.Printf("[DEBUG] client: state changed, updating node and re-registering.") - - // Update the config copy. - c.configLock.Lock() - node := c.config.Node.Copy() - c.configCopy.Node = node - c.configLock.Unlock() - c.retryRegisterNode() - hasChanged = false case <-c.triggerNodeUpdate: if hasChanged { @@ -1899,7 +1899,10 @@ func (c *Client) addAlloc(alloc *structs.Allocation, migrateToken string) error c.configLock.RLock() prevAlloc := newAllocWatcher(alloc, prevAR, c, c.configCopy, c.logger, migrateToken) - ar := NewAllocRunner(c.logger, c.configCopy, c.stateDB, c.updateAllocStatus, alloc, c.vaultClient, c.consulService, prevAlloc) + // Copy the config since the node can be swapped out as it is being updated. + // The long term fix is to pass in the config and node separately and then + // we don't have to do a copy. + ar := NewAllocRunner(c.logger, c.configCopy.Copy(), c.stateDB, c.updateAllocStatus, alloc, c.vaultClient, c.consulService, prevAlloc) c.configLock.RUnlock() // Store the alloc runner. diff --git a/client/fingerprint_manager.go b/client/fingerprint_manager.go index 499cc16d71f..cdd9e1472c1 100644 --- a/client/fingerprint_manager.go +++ b/client/fingerprint_manager.go @@ -53,10 +53,16 @@ func NewFingerprintManager(getConfig func() *config.Config, func (fm *FingerprintManager) setNode(node *structs.Node) { fm.nodeLock.Lock() defer fm.nodeLock.Unlock() - fm.node = node } +// getNode returns the current client node +func (fm *FingerprintManager) getNode() *structs.Node { + fm.nodeLock.Lock() + defer fm.nodeLock.Unlock() + return fm.node +} + // Run starts the process of fingerprinting the node. It does an initial pass, // identifying whitelisted and blacklisted fingerprints/drivers. Then, for // those which require periotic checking, it starts a periodic process for @@ -167,7 +173,7 @@ func (fm *FingerprintManager) setupFingerprinters(fingerprints []string) error { // supported func (fm *FingerprintManager) setupDrivers(drivers []string) error { var availDrivers []string - driverCtx := driver.NewDriverContext("", "", fm.getConfig(), fm.node, fm.logger, nil) + driverCtx := driver.NewDriverContext("", "", fm.getConfig(), fm.getNode(), fm.logger, nil) for _, name := range drivers { d, err := driver.NewDriver(name, driverCtx) diff --git a/nomad/structs/node.go b/nomad/structs/node.go index a4eb91e719c..76758fb8e98 100644 --- a/nomad/structs/node.go +++ b/nomad/structs/node.go @@ -2,6 +2,8 @@ package structs import ( "time" + + "github.com/hashicorp/nomad/helper" ) // DriverInfo is the current state of a single driver. This is updated @@ -14,6 +16,17 @@ type DriverInfo struct { UpdateTime time.Time } +func (di *DriverInfo) Copy() *DriverInfo { + if di == nil { + return nil + } + + cdi := new(DriverInfo) + *cdi = *di + cdi.Attributes = helper.CopyMapStringString(di.Attributes) + return cdi +} + // MergeHealthCheck merges information from a health check for a drier into a // node's driver info func (di *DriverInfo) MergeHealthCheck(other *DriverInfo) { diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 1fd30e3221d..60d8f0d8019 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1461,6 +1461,7 @@ func (n *Node) Copy() *Node { nn.Meta = helper.CopyMapStringString(nn.Meta) nn.Events = copyNodeEvents(n.Events) nn.DrainStrategy = nn.DrainStrategy.Copy() + nn.Drivers = copyNodeDrivers(n.Drivers) return nn } @@ -1478,6 +1479,20 @@ func copyNodeEvents(events []*NodeEvent) []*NodeEvent { return c } +// copyNodeDrivers is a helper to copy a map of DriverInfo +func copyNodeDrivers(drivers map[string]*DriverInfo) map[string]*DriverInfo { + l := len(drivers) + if l == 0 { + return nil + } + + c := make(map[string]*DriverInfo, l) + for driver, info := range drivers { + c[driver] = info.Copy() + } + return c +} + // TerminalStatus returns if the current status is terminal and // will no longer transition. func (n *Node) TerminalStatus() bool {