From d727b72ce4480f226592aa8c61c55483c64aed1a Mon Sep 17 00:00:00 2001 From: Charlie Voiselle <464492+angrycub@users.noreply.github.com> Date: Wed, 7 Sep 2022 14:37:40 +0000 Subject: [PATCH 1/4] backport of commit eb6a261f7bf6373415a64c22cd554f84f3c24841 --- client/client.go | 5 +++++ nomad/node_endpoint.go | 14 ++++++++++---- nomad/structs/structs.go | 10 ++++++++++ 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/client/client.go b/client/client.go index 8383bcdcd1d..f7426649c34 100644 --- a/client/client.go +++ b/client/client.go @@ -1900,6 +1900,11 @@ func (c *Client) updateNodeStatus() error { "req_latency", end.Sub(start), "heartbeat_ttl", oldTTL, "since_last_heartbeat", time.Since(last)) } } + // Check heartbeat response for information about the server-side scheduling + // state of this node + c.UpdateConfig(func(c *config.Config) { + c.Node.SchedulingEligibility = resp.ClientStatus.SchedulingEligibility + }) // Update the number of nodes in the cluster so we can adjust our server // rebalance rate. diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go index d5a1725b485..820ed5b82dc 100644 --- a/nomad/node_endpoint.go +++ b/nomad/node_endpoint.go @@ -199,7 +199,7 @@ func (n *Node) Register(args *structs.NodeRegisterRequest, reply *structs.NodeUp n.srv.peerLock.RLock() defer n.srv.peerLock.RUnlock() - if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { + if err := n.constructNodeServerInfoResponse(args.Node.ID, snap, reply); err != nil { n.logger.Error("failed to populate NodeUpdateResponse", "error", err) return err } @@ -258,7 +258,7 @@ func equalDevices(n1, n2 *structs.Node) bool { } // updateNodeUpdateResponse assumes the n.srv.peerLock is held for reading. -func (n *Node) constructNodeServerInfoResponse(snap *state.StateSnapshot, reply *structs.NodeUpdateResponse) error { +func (n *Node) constructNodeServerInfoResponse(nodeID string, snap *state.StateSnapshot, reply *structs.NodeUpdateResponse) error { reply.LeaderRPCAddr = string(n.srv.raft.Leader()) // Reply with config information required for future RPC requests @@ -273,6 +273,12 @@ func (n *Node) constructNodeServerInfoResponse(snap *state.StateSnapshot, reply }) } + // Add ClientStatus information to heartbeat response. + node, _ := snap.NodeByID(nil, nodeID) + reply.ClientStatus = &structs.ClientStatus{ + SchedulingEligibility: node.SchedulingEligibility, + } + // TODO(sean@): Use an indexed node count instead // // Snapshot is used only to iterate over all nodes to create a node @@ -537,7 +543,7 @@ func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *struct reply.Index = index n.srv.peerLock.RLock() defer n.srv.peerLock.RUnlock() - if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { + if err := n.constructNodeServerInfoResponse(node.GetID(), snap, reply); err != nil { n.logger.Error("failed to populate NodeUpdateResponse", "error", err) return err } @@ -789,7 +795,7 @@ func (n *Node) Evaluate(args *structs.NodeEvaluateRequest, reply *structs.NodeUp n.srv.peerLock.RLock() defer n.srv.peerLock.RUnlock() - if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { + if err := n.constructNodeServerInfoResponse(node.GetID(), snap, reply); err != nil { n.logger.Error("failed to populate NodeUpdateResponse", "error", err) return err } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 5712728b9ce..dbb33764ccb 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1339,9 +1339,19 @@ type NodeUpdateResponse struct { // region. Servers []*NodeServerInfo + // ClientStatus is used to inform clients what the server-side + // has for their scheduling status during heartbeats. + ClientStatus *ClientStatus + QueryMeta } +// ClientStatus is used to inform clients what the server-side +// has for their scheduling status during heartbeats. +type ClientStatus struct { + SchedulingEligibility string +} + // NodeDrainUpdateResponse is used to respond to a node drain update type NodeDrainUpdateResponse struct { NodeModifyIndex uint64 From 4fb077bbe55ad7db8d5db794fd61db2c8fe97cdc Mon Sep 17 00:00:00 2001 From: Charlie Voiselle <464492+angrycub@users.noreply.github.com> Date: Wed, 7 Sep 2022 14:50:03 +0000 Subject: [PATCH 2/4] backport of commit 44baeb12375f6bb8cffef5f3ed3a4b5a48818a66 --- client/client.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/client/client.go b/client/client.go index f7426649c34..65f453b10c3 100644 --- a/client/client.go +++ b/client/client.go @@ -1903,7 +1903,9 @@ func (c *Client) updateNodeStatus() error { // Check heartbeat response for information about the server-side scheduling // state of this node c.UpdateConfig(func(c *config.Config) { - c.Node.SchedulingEligibility = resp.ClientStatus.SchedulingEligibility + if resp.ClientStatus != nil { + c.Node.SchedulingEligibility = resp.ClientStatus.SchedulingEligibility + } }) // Update the number of nodes in the cluster so we can adjust our server From 1d7b45b10aa36fd75dca80b450c19e25e07d0641 Mon Sep 17 00:00:00 2001 From: Charlie Voiselle <464492+angrycub@users.noreply.github.com> Date: Thu, 8 Sep 2022 17:45:32 +0000 Subject: [PATCH 3/4] backport of commit 8493f3a0c5dd8b75c31266aeb590c9a5f5bf9060 --- client/client.go | 5 +++-- nomad/node_endpoint.go | 4 +--- nomad/structs/structs.go | 10 ++-------- 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/client/client.go b/client/client.go index 65f453b10c3..c112542cb67 100644 --- a/client/client.go +++ b/client/client.go @@ -1900,11 +1900,12 @@ func (c *Client) updateNodeStatus() error { "req_latency", end.Sub(start), "heartbeat_ttl", oldTTL, "since_last_heartbeat", time.Since(last)) } } + // Check heartbeat response for information about the server-side scheduling // state of this node c.UpdateConfig(func(c *config.Config) { - if resp.ClientStatus != nil { - c.Node.SchedulingEligibility = resp.ClientStatus.SchedulingEligibility + if resp.SchedulingEligibility != "" { + c.Node.SchedulingEligibility = resp.SchedulingEligibility } }) diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go index 820ed5b82dc..35049c3d375 100644 --- a/nomad/node_endpoint.go +++ b/nomad/node_endpoint.go @@ -275,9 +275,7 @@ func (n *Node) constructNodeServerInfoResponse(nodeID string, snap *state.StateS // Add ClientStatus information to heartbeat response. node, _ := snap.NodeByID(nil, nodeID) - reply.ClientStatus = &structs.ClientStatus{ - SchedulingEligibility: node.SchedulingEligibility, - } + reply.SchedulingEligibility = node.SchedulingEligibility // TODO(sean@): Use an indexed node count instead // diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index dbb33764ccb..58c71ec9055 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1339,19 +1339,13 @@ type NodeUpdateResponse struct { // region. Servers []*NodeServerInfo - // ClientStatus is used to inform clients what the server-side + // SchedulingEligibility is used to inform clients what the server-side // has for their scheduling status during heartbeats. - ClientStatus *ClientStatus + SchedulingEligibility string QueryMeta } -// ClientStatus is used to inform clients what the server-side -// has for their scheduling status during heartbeats. -type ClientStatus struct { - SchedulingEligibility string -} - // NodeDrainUpdateResponse is used to respond to a node drain update type NodeDrainUpdateResponse struct { NodeModifyIndex uint64 From d9fd04aaa162d5b61e4d7d8821a8029fe358ed1c Mon Sep 17 00:00:00 2001 From: Charlie Voiselle <464492+angrycub@users.noreply.github.com> Date: Thu, 8 Sep 2022 17:54:39 +0000 Subject: [PATCH 4/4] backport of commit 3e9a163827f6d2d1f31d23c466ce131ecad8581f --- .changelog/14483.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .changelog/14483.txt diff --git a/.changelog/14483.txt b/.changelog/14483.txt new file mode 100644 index 00000000000..07a3e141de1 --- /dev/null +++ b/.changelog/14483.txt @@ -0,0 +1,3 @@ +```release-note:bug +metrics: Update client `node_scheduling_eligibility` value with server heartbeats. +```