diff --git a/.changelog/14483.txt b/.changelog/14483.txt new file mode 100644 index 00000000000..07a3e141de1 --- /dev/null +++ b/.changelog/14483.txt @@ -0,0 +1,3 @@ +```release-note:bug +metrics: Update client `node_scheduling_eligibility` value with server heartbeats. +``` diff --git a/client/client.go b/client/client.go index e9cbf50c57a..51dfefa308a 100644 --- a/client/client.go +++ b/client/client.go @@ -1870,6 +1870,14 @@ func (c *Client) updateNodeStatus() error { } } + // Check heartbeat response for information about the server-side scheduling + // state of this node + c.UpdateConfig(func(c *config.Config) { + if resp.SchedulingEligibility != "" { + c.Node.SchedulingEligibility = resp.SchedulingEligibility + } + }) + // Update the number of nodes in the cluster so we can adjust our server // rebalance rate. c.servers.SetNumNodes(resp.NumNodes) diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go index d5a1725b485..35049c3d375 100644 --- a/nomad/node_endpoint.go +++ b/nomad/node_endpoint.go @@ -199,7 +199,7 @@ func (n *Node) Register(args *structs.NodeRegisterRequest, reply *structs.NodeUp n.srv.peerLock.RLock() defer n.srv.peerLock.RUnlock() - if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { + if err := n.constructNodeServerInfoResponse(args.Node.ID, snap, reply); err != nil { n.logger.Error("failed to populate NodeUpdateResponse", "error", err) return err } @@ -258,7 +258,7 @@ func equalDevices(n1, n2 *structs.Node) bool { } // updateNodeUpdateResponse assumes the n.srv.peerLock is held for reading. -func (n *Node) constructNodeServerInfoResponse(snap *state.StateSnapshot, reply *structs.NodeUpdateResponse) error { +func (n *Node) constructNodeServerInfoResponse(nodeID string, snap *state.StateSnapshot, reply *structs.NodeUpdateResponse) error { reply.LeaderRPCAddr = string(n.srv.raft.Leader()) // Reply with config information required for future RPC requests @@ -273,6 +273,10 @@ func (n *Node) constructNodeServerInfoResponse(snap *state.StateSnapshot, reply }) } + // Add ClientStatus information to heartbeat response. + node, _ := snap.NodeByID(nil, nodeID) + reply.SchedulingEligibility = node.SchedulingEligibility + // TODO(sean@): Use an indexed node count instead // // Snapshot is used only to iterate over all nodes to create a node @@ -537,7 +541,7 @@ func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *struct reply.Index = index n.srv.peerLock.RLock() defer n.srv.peerLock.RUnlock() - if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { + if err := n.constructNodeServerInfoResponse(node.GetID(), snap, reply); err != nil { n.logger.Error("failed to populate NodeUpdateResponse", "error", err) return err } @@ -789,7 +793,7 @@ func (n *Node) Evaluate(args *structs.NodeEvaluateRequest, reply *structs.NodeUp n.srv.peerLock.RLock() defer n.srv.peerLock.RUnlock() - if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { + if err := n.constructNodeServerInfoResponse(node.GetID(), snap, reply); err != nil { n.logger.Error("failed to populate NodeUpdateResponse", "error", err) return err } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index c137d74d85b..c7555c41a59 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1295,6 +1295,10 @@ type NodeUpdateResponse struct { // region. Servers []*NodeServerInfo + // SchedulingEligibility is used to inform clients what the server-side + // has for their scheduling status during heartbeats. + SchedulingEligibility string + QueryMeta }