From 9929019b9afe6562f7521288bf0954e971975986 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Mon, 16 Apr 2018 13:28:23 -0700 Subject: [PATCH 1/7] Operate on copy --- client/client.go | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/client/client.go b/client/client.go index 46d8bd1d706..cfefef1f644 100644 --- a/client/client.go +++ b/client/client.go @@ -259,6 +259,12 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic return nil, fmt.Errorf("node setup failed: %v", err) } + // Store the config copy before restoring state but after it has been + // initialized. + c.configLock.Lock() + c.configCopy = c.config.Copy() + c.configLock.Unlock() + fingerprintManager := NewFingerprintManager(c.GetConfig, c.config.Node, c.shutdownCh, c.updateNodeFromFingerprint, c.updateNodeFromDriver, c.logger) @@ -1009,10 +1015,10 @@ func (c *Client) updateNodeFromFingerprint(response *cstructs.FingerprintRespons } if nodeHasChanged { - c.updateNode() + c.updateNodeLocked() } - return c.config.Node + return c.configCopy.Node } // updateNodeFromDriver receives either a fingerprint of the driver or its @@ -1104,10 +1110,10 @@ func (c *Client) updateNodeFromDriver(name string, fingerprint, health *structs. if hasChanged { c.config.Node.Drivers[name].UpdateTime = time.Now() - c.updateNode() + c.updateNodeLocked() } - return c.config.Node + return c.configCopy.Node } // resourcesAreEqual is a temporary function to compare whether resources are @@ -1752,9 +1758,14 @@ OUTER: } } -// updateNode triggers a client to update its node copy if it isn't doing +// updateNode updates the Node copy and triggers the client to send the updated +// Node to the server. This should be done while holding the configLock lock. // so already -func (c *Client) updateNode() { +func (c *Client) updateNodeLocked() { + // Update the config copy. + node := c.config.Node.Copy() + c.configCopy.Node = node + select { case c.triggerNodeUpdate <- struct{}{}: // Node update goroutine was released to execute @@ -1774,15 +1785,7 @@ func (c *Client) watchNodeUpdates() { select { case <-timer.C: c.logger.Printf("[DEBUG] client: state changed, updating node and re-registering.") - - // Update the config copy. - c.configLock.Lock() - node := c.config.Node.Copy() - c.configCopy.Node = node - c.configLock.Unlock() - c.retryRegisterNode() - hasChanged = false case <-c.triggerNodeUpdate: if hasChanged { From 89fa9a1e10c064ebc72fc76252e72d6ea2cb399c Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Mon, 16 Apr 2018 15:02:00 -0700 Subject: [PATCH 2/7] Fix copying drivers --- client/client.go | 4 ++-- nomad/structs/node.go | 13 +++++++++++++ nomad/structs/structs.go | 15 +++++++++++++++ 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/client/client.go b/client/client.go index cfefef1f644..2cb9dac5949 100644 --- a/client/client.go +++ b/client/client.go @@ -265,7 +265,7 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic c.configCopy = c.config.Copy() c.configLock.Unlock() - fingerprintManager := NewFingerprintManager(c.GetConfig, c.config.Node, + fingerprintManager := NewFingerprintManager(c.GetConfig, c.configCopy.Node, c.shutdownCh, c.updateNodeFromFingerprint, c.updateNodeFromDriver, c.logger) @@ -443,7 +443,7 @@ func (c *Client) Leave() error { func (c *Client) GetConfig() *config.Config { c.configLock.Lock() defer c.configLock.Unlock() - return c.config + return c.configCopy } // Datacenter returns the datacenter for the given client diff --git a/nomad/structs/node.go b/nomad/structs/node.go index a4eb91e719c..76758fb8e98 100644 --- a/nomad/structs/node.go +++ b/nomad/structs/node.go @@ -2,6 +2,8 @@ package structs import ( "time" + + "github.com/hashicorp/nomad/helper" ) // DriverInfo is the current state of a single driver. This is updated @@ -14,6 +16,17 @@ type DriverInfo struct { UpdateTime time.Time } +func (di *DriverInfo) Copy() *DriverInfo { + if di == nil { + return nil + } + + cdi := new(DriverInfo) + *cdi = *di + cdi.Attributes = helper.CopyMapStringString(di.Attributes) + return cdi +} + // MergeHealthCheck merges information from a health check for a drier into a // node's driver info func (di *DriverInfo) MergeHealthCheck(other *DriverInfo) { diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 1fd30e3221d..60d8f0d8019 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1461,6 +1461,7 @@ func (n *Node) Copy() *Node { nn.Meta = helper.CopyMapStringString(nn.Meta) nn.Events = copyNodeEvents(n.Events) nn.DrainStrategy = nn.DrainStrategy.Copy() + nn.Drivers = copyNodeDrivers(n.Drivers) return nn } @@ -1478,6 +1479,20 @@ func copyNodeEvents(events []*NodeEvent) []*NodeEvent { return c } +// copyNodeDrivers is a helper to copy a map of DriverInfo +func copyNodeDrivers(drivers map[string]*DriverInfo) map[string]*DriverInfo { + l := len(drivers) + if l == 0 { + return nil + } + + c := make(map[string]*DriverInfo, l) + for driver, info := range drivers { + c[driver] = info.Copy() + } + return c +} + // TerminalStatus returns if the current status is terminal and // will no longer transition. func (n *Node) TerminalStatus() bool { From 741975657db3663da4af49645bcf7007f9adc921 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Mon, 16 Apr 2018 15:07:08 -0700 Subject: [PATCH 3/7] fix race node access --- client/fingerprint_manager.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/client/fingerprint_manager.go b/client/fingerprint_manager.go index 499cc16d71f..cdd9e1472c1 100644 --- a/client/fingerprint_manager.go +++ b/client/fingerprint_manager.go @@ -53,10 +53,16 @@ func NewFingerprintManager(getConfig func() *config.Config, func (fm *FingerprintManager) setNode(node *structs.Node) { fm.nodeLock.Lock() defer fm.nodeLock.Unlock() - fm.node = node } +// getNode returns the current client node +func (fm *FingerprintManager) getNode() *structs.Node { + fm.nodeLock.Lock() + defer fm.nodeLock.Unlock() + return fm.node +} + // Run starts the process of fingerprinting the node. It does an initial pass, // identifying whitelisted and blacklisted fingerprints/drivers. Then, for // those which require periotic checking, it starts a periodic process for @@ -167,7 +173,7 @@ func (fm *FingerprintManager) setupFingerprinters(fingerprints []string) error { // supported func (fm *FingerprintManager) setupDrivers(drivers []string) error { var availDrivers []string - driverCtx := driver.NewDriverContext("", "", fm.getConfig(), fm.node, fm.logger, nil) + driverCtx := driver.NewDriverContext("", "", fm.getConfig(), fm.getNode(), fm.logger, nil) for _, name := range drivers { d, err := driver.NewDriver(name, driverCtx) From 3cf87e0dc8ce19820545054c5cd33bc5a043cb69 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Mon, 16 Apr 2018 15:41:32 -0700 Subject: [PATCH 4/7] Copy the config given to the alloc runner --- client/client.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/client/client.go b/client/client.go index 2cb9dac5949..82e685f124e 100644 --- a/client/client.go +++ b/client/client.go @@ -732,7 +732,7 @@ func (c *Client) restoreState() error { watcher := noopPrevAlloc{} c.configLock.RLock() - ar := NewAllocRunner(c.logger, c.configCopy, c.stateDB, c.updateAllocStatus, alloc, c.vaultClient, c.consulService, watcher) + ar := NewAllocRunner(c.logger, c.configCopy.Copy(), c.stateDB, c.updateAllocStatus, alloc, c.vaultClient, c.consulService, watcher) c.configLock.RUnlock() c.allocLock.Lock() @@ -1902,7 +1902,10 @@ func (c *Client) addAlloc(alloc *structs.Allocation, migrateToken string) error c.configLock.RLock() prevAlloc := newAllocWatcher(alloc, prevAR, c, c.configCopy, c.logger, migrateToken) - ar := NewAllocRunner(c.logger, c.configCopy, c.stateDB, c.updateAllocStatus, alloc, c.vaultClient, c.consulService, prevAlloc) + // Copy the config since the node can be swapped out as it is being updated. + // The long term fix is to pass in the config and node separately and then + // we don't have to do a copy. + ar := NewAllocRunner(c.logger, c.configCopy.Copy(), c.stateDB, c.updateAllocStatus, alloc, c.vaultClient, c.consulService, prevAlloc) c.configLock.RUnlock() // Store the alloc runner. From cc3f264a35f4f1e8826d7de0d8917968cd34ed9b Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Mon, 16 Apr 2018 15:48:34 -0700 Subject: [PATCH 5/7] Cleanup --- client/client.go | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/client/client.go b/client/client.go index 82e685f124e..35adbc53436 100644 --- a/client/client.go +++ b/client/client.go @@ -277,12 +277,6 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic // Setup the reserved resources c.reservePorts() - // Store the config copy before restoring state but after it has been - // initialized. - c.configLock.Lock() - c.configCopy = c.config.Copy() - c.configLock.Unlock() - // Set the preconfigured list of static servers c.configLock.RLock() if len(c.configCopy.Servers) > 0 { @@ -969,6 +963,9 @@ func (c *Client) reservePorts() { for _, net := range reservedIndex { node.Reserved.Networks = append(node.Reserved.Networks, net) } + + // Make the changes available to the config copy. + c.configCopy = c.config.Copy() } // updateNodeFromFingerprint updates the node with the result of From 3ebecb676cbad9f0f7e1b7fe8156d04336ccb971 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Tue, 17 Apr 2018 11:53:08 -0400 Subject: [PATCH 6/7] fix up comments --- client/client.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/client/client.go b/client/client.go index 35adbc53436..a3e8dad6b6d 100644 --- a/client/client.go +++ b/client/client.go @@ -1756,8 +1756,8 @@ OUTER: } // updateNode updates the Node copy and triggers the client to send the updated -// Node to the server. This should be done while holding the configLock lock. -// so already +// Node to the server. This should be done while the caller holds the +// configLock lock. func (c *Client) updateNodeLocked() { // Update the config copy. node := c.config.Node.Copy() From 0596828eb5499b226241dc0f0063562d0547ba1b Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Tue, 17 Apr 2018 11:53:18 -0400 Subject: [PATCH 7/7] add test for node copy --- nomad/structs/structs_test.go | 77 +++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index 8883e7a0b37..540441d6104 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -3676,3 +3676,80 @@ func TestNode_Canonicalize(t *testing.T) { node.Canonicalize() require.Equal(NodeSchedulingIneligible, node.SchedulingEligibility) } + +func TestNode_Copy(t *testing.T) { + t.Parallel() + require := require.New(t) + + node := &Node{ + ID: uuid.Generate(), + SecretID: uuid.Generate(), + Datacenter: "dc1", + Name: "foobar", + Attributes: map[string]string{ + "kernel.name": "linux", + "arch": "x86", + "nomad.version": "0.5.0", + "driver.exec": "1", + "driver.mock_driver": "1", + }, + Resources: &Resources{ + CPU: 4000, + MemoryMB: 8192, + DiskMB: 100 * 1024, + IOPS: 150, + Networks: []*NetworkResource{ + { + Device: "eth0", + CIDR: "192.168.0.100/32", + MBits: 1000, + }, + }, + }, + Reserved: &Resources{ + CPU: 100, + MemoryMB: 256, + DiskMB: 4 * 1024, + Networks: []*NetworkResource{ + { + Device: "eth0", + IP: "192.168.0.100", + ReservedPorts: []Port{{Label: "ssh", Value: 22}}, + MBits: 1, + }, + }, + }, + Links: map[string]string{ + "consul": "foobar.dc1", + }, + Meta: map[string]string{ + "pci-dss": "true", + "database": "mysql", + "version": "5.6", + }, + NodeClass: "linux-medium-pci", + Status: NodeStatusReady, + SchedulingEligibility: NodeSchedulingEligible, + Drivers: map[string]*DriverInfo{ + "mock_driver": &DriverInfo{ + Attributes: map[string]string{"running": "1"}, + Detected: true, + Healthy: true, + HealthDescription: "Currently active", + UpdateTime: time.Now(), + }, + }, + } + node.ComputeClass() + + node2 := node.Copy() + + require.Equal(node.Attributes, node2.Attributes) + require.Equal(node.Resources, node2.Resources) + require.Equal(node.Reserved, node2.Reserved) + require.Equal(node.Links, node2.Links) + require.Equal(node.Meta, node2.Meta) + require.Equal(node.Events, node2.Events) + require.Equal(node.DrainStrategy, node2.DrainStrategy) + require.Equal(node.Drivers, node2.Drivers) +}