From 0406d4b5ec13b6b601eaa7384c7db0de9de96e64 Mon Sep 17 00:00:00 2001 From: Chris Baker <1675087+cgbaker@users.noreply.github.com> Date: Tue, 23 Feb 2021 20:35:52 +0000 Subject: [PATCH 01/19] wip: rebase --- api/nodes.go | 15 ++++++ api/nodes_test.go | 16 ++++++ nomad/fsm.go | 17 +++++- nomad/node_endpoint_test.go | 15 +++++- nomad/state/events_test.go | 2 +- nomad/state/state_store.go | 53 ++++++++++++++++--- nomad/state/state_store_test.go | 19 +++++-- nomad/structs/structs.go | 48 ++++++++++++++++- .../github.com/hashicorp/nomad/api/nodes.go | 11 ++++ 9 files changed, 181 insertions(+), 15 deletions(-) diff --git a/api/nodes.go b/api/nodes.go index 4c9b8c70058..314a782d914 100644 --- a/api/nodes.go +++ b/api/nodes.go @@ -18,6 +18,10 @@ const ( // status being ready. NodeSchedulingEligible = "eligible" NodeSchedulingIneligible = "ineligible" + + DrainStatusDraining = "draining" + DrainStatusCompleted = "complete" + DrainStatusCancelled = "cancelled" ) // Nodes is used to query node-related API endpoints @@ -468,6 +472,15 @@ type HostVolumeInfo struct { ReadOnly bool } +// DrainMetadata contains information about the most recent drain operation for a given Node. +type DrainMetadata struct { + StartedAt time.Time + UpdatedAt time.Time + Status string + AccessorID string + Meta map[string]string +} + // Node is used to deserialize a node entry. type Node struct { ID string @@ -494,6 +507,7 @@ type Node struct { HostVolumes map[string]*HostVolumeInfo CSIControllerPlugins map[string]*CSIInfo CSINodePlugins map[string]*CSIInfo + LastDrain *DrainMetadata CreateIndex uint64 ModifyIndex uint64 } @@ -789,6 +803,7 @@ type NodeListStub struct { Drivers map[string]*DriverInfo NodeResources *NodeResources `json:",omitempty"` ReservedResources *NodeReservedResources `json:",omitempty"` + LastDrain *DrainMetadata CreateIndex uint64 ModifyIndex uint64 } diff --git a/api/nodes_test.go b/api/nodes_test.go index 7ed2b956604..2fd9a77f356 100644 --- a/api/nodes_test.go +++ b/api/nodes_test.go @@ -241,8 +241,10 @@ func TestNodes_ToggleDrain(t *testing.T) { out, _, err := nodes.Info(nodeID, nil) require.Nil(err) require.False(out.Drain) + require.Nil(out.LastDrain) // Toggle it on + timeBeforeDrain := time.Now().Add(-1 * time.Second) spec := &DrainSpec{ Deadline: 10 * time.Second, } @@ -260,6 +262,8 @@ func TestNodes_ToggleDrain(t *testing.T) { require.NoError(err) // we expect to see the node change to Drain:true and then back to Drain:false+ineligible + // sleep because of time round-off in LastDrain + time.Sleep(1 * time.Second) var sawDraining, sawDrainComplete uint64 for sawDrainComplete == 0 { select { @@ -271,9 +275,21 @@ func TestNodes_ToggleDrain(t *testing.T) { require.Equal(node.DrainStrategy != nil, node.Drain) require.True(!node.Drain || node.SchedulingEligibility == NodeSchedulingIneligible) // node.Drain => "ineligible" if node.Drain && node.SchedulingEligibility == NodeSchedulingIneligible { + require.NotNil(node.LastDrain) + require.Equal(DrainStatusDraining, node.LastDrain.Status) + now := time.Now() + require.True(!node.LastDrain.StartedAt.Before(timeBeforeDrain) && !node.LastDrain.StartedAt.After(now), + "wanted %v <= %v <= %v", timeBeforeDrain, node.LastDrain.StartedAt, now) + // TODO: test meta vvvv + // require.Equal(node.LastDrain.Meta["reason"]) sawDraining = node.ModifyIndex } else if sawDraining != 0 && node.ModifyIndex > sawDraining && !node.Drain && node.SchedulingEligibility == NodeSchedulingIneligible { + require.NotNil(node.LastDrain) + require.Equal(DrainStatusCompleted, node.LastDrain.Status) + require.True(!node.LastDrain.UpdatedAt.Before(node.LastDrain.StartedAt)) + // TODO: test meta vvvv + // require.Equal(node.LastDrain.Meta["reason"]) sawDrainComplete = node.ModifyIndex } } diff --git a/nomad/fsm.go b/nomad/fsm.go index e3f6474cf2d..ed83b976879 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -429,7 +429,22 @@ func (n *nomadFSM) applyDrainUpdate(reqType structs.MessageType, buf []byte, ind panic(fmt.Errorf("failed to decode request: %v", err)) } - if err := n.state.UpdateNodeDrain(reqType, index, req.NodeID, req.DrainStrategy, req.MarkEligible, req.UpdatedAt, req.NodeEvent); err != nil { + accessorId := "" + if req.AuthToken != "" { + token, err := n.state.ACLTokenBySecretID(nil, req.AuthToken) + if err != nil { + n.logger.Error("error looking up ACL token from drain update", "error", err) + return fmt.Errorf("error looking up ACL token: %v", err) + } + if token == nil { + n.logger.Error("token did not exist during node drain update") + return fmt.Errorf("token did not exist during node drain update") + } + accessorId = token.AccessorID + } + + if err := n.state.UpdateNodeDrain(reqType, index, req.NodeID, req.DrainStrategy, req.MarkEligible, req.UpdatedAt, + req.NodeEvent, req.Meta, accessorId); err != nil { n.logger.Error("UpdateNodeDrain failed", "error", err) return err } diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go index c26945832c7..95c29744245 100644 --- a/nomad/node_endpoint_test.go +++ b/nomad/node_endpoint_test.go @@ -903,7 +903,10 @@ func TestClientEndpoint_UpdateDrain(t *testing.T) { dereg := &structs.NodeUpdateDrainRequest{ NodeID: node.ID, DrainStrategy: strategy, - WriteRequest: structs.WriteRequest{Region: "global"}, + Meta: map[string]string{ + "message": "this node looks funny", + }, + WriteRequest: structs.WriteRequest{Region: "global"}, } var resp2 structs.NodeDrainUpdateResponse require.Nil(msgpackrpc.CallWithCodec(codec, "Node.UpdateDrain", dereg, &resp2)) @@ -918,6 +921,11 @@ func TestClientEndpoint_UpdateDrain(t *testing.T) { require.Equal(strategy.Deadline, out.DrainStrategy.Deadline) require.Len(out.Events, 2) require.Equal(NodeDrainEventDrainSet, out.Events[1].Message) + require.NotNil(out.LastDrain) + require.Equal(structs.DrainStatusDraining, out.LastDrain.Status) + require.True(out.LastDrain.StartedAt.Before(time.Now())) + require.Equal(out.LastDrain.StartedAt, out.LastDrain.UpdatedAt) + require.Equal("this node looks funny", out.LastDrain.Meta["message"]) // before+deadline should be before the forced deadline require.True(beforeUpdate.Add(strategy.Deadline).Before(out.DrainStrategy.ForceDeadline)) @@ -1006,6 +1014,9 @@ func TestClientEndpoint_UpdateDrain_ACL(t *testing.T) { { var resp structs.NodeDrainUpdateResponse require.Nil(msgpackrpc.CallWithCodec(codec, "Node.UpdateDrain", dereg, &resp), "RPC") + out, err := state.NodeByID(nil, node.ID) + require.NoError(err) + require.Equal(validToken.AccessorID, out.LastDrain.AccessorID) } // Try with a invalid token @@ -2858,7 +2869,7 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) { Deadline: 10 * time.Second, }, } - errCh <- state.UpdateNodeDrain(structs.MsgTypeTestSetup, 3, node.ID, s, false, 0, nil) + errCh <- state.UpdateNodeDrain(structs.MsgTypeTestSetup, 3, node.ID, s, false, 0, nil, nil, "") }) req.MinQueryIndex = 2 diff --git a/nomad/state/events_test.go b/nomad/state/events_test.go index 1ae6edf6231..078ba43ced4 100644 --- a/nomad/state/events_test.go +++ b/nomad/state/events_test.go @@ -935,7 +935,7 @@ func TestNodeDrainEventFromChanges(t *testing.T) { updatedAt := time.Now() event := &structs.NodeEvent{} - require.NoError(t, s.updateNodeDrainImpl(tx, 100, node.ID, strat, markEligible, updatedAt.UnixNano(), event)) + require.NoError(t, s.updateNodeDrainImpl(tx, 100, node.ID, strat, markEligible, updatedAt.UnixNano(), event, nil, "", false)) changes := Changes{Changes: tx.Changes(), Index: 100, MsgType: structs.NodeUpdateDrainRequestType} got := eventsFromChanges(tx, changes) diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 6d1f0c4be2c..6d93f1b5313 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -834,6 +834,7 @@ func upsertNodeTxn(txn *txn, index uint64, node *structs.Node) error { node.SchedulingEligibility = exist.SchedulingEligibility // Retain the eligibility node.DrainStrategy = exist.DrainStrategy // Retain the drain strategy + node.LastDrain = exist.LastDrain // Retain the drain metadata } else { // Because this is the first time the node is being registered, we should // also create a node registration event @@ -951,12 +952,14 @@ func (s *StateStore) updateNodeStatusTxn(txn *txn, nodeID, status string, update } // BatchUpdateNodeDrain is used to update the drain of a node set of nodes. -// This is only called when node drain is completed by the drainer. -func (s *StateStore) BatchUpdateNodeDrain(msgType structs.MessageType, index uint64, updatedAt int64, updates map[string]*structs.DrainUpdate, events map[string]*structs.NodeEvent) error { +// This is currently only called when node drain is completed by the drainer. +func (s *StateStore) BatchUpdateNodeDrain(msgType structs.MessageType, index uint64, updatedAt int64, + updates map[string]*structs.DrainUpdate, events map[string]*structs.NodeEvent) error { txn := s.db.WriteTxnMsgT(msgType, index) defer txn.Abort() for node, update := range updates { - if err := s.updateNodeDrainImpl(txn, index, node, update.DrainStrategy, update.MarkEligible, updatedAt, events[node]); err != nil { + if err := s.updateNodeDrainImpl(txn, index, node, update.DrainStrategy, update.MarkEligible, updatedAt, + events[node], nil, "", true); err != nil { return err } } @@ -964,11 +967,14 @@ func (s *StateStore) BatchUpdateNodeDrain(msgType structs.MessageType, index uin } // UpdateNodeDrain is used to update the drain of a node -func (s *StateStore) UpdateNodeDrain(msgType structs.MessageType, index uint64, nodeID string, drain *structs.DrainStrategy, markEligible bool, updatedAt int64, event *structs.NodeEvent) error { +func (s *StateStore) UpdateNodeDrain(msgType structs.MessageType, index uint64, nodeID string, + drain *structs.DrainStrategy, markEligible bool, updatedAt int64, + event *structs.NodeEvent, drainMeta map[string]string, accessorId string) error { txn := s.db.WriteTxnMsgT(msgType, index) defer txn.Abort() - if err := s.updateNodeDrainImpl(txn, index, nodeID, drain, markEligible, updatedAt, event); err != nil { + if err := s.updateNodeDrainImpl(txn, index, nodeID, drain, markEligible, updatedAt, event, + drainMeta, accessorId, false); err != nil { return err } @@ -976,7 +982,9 @@ func (s *StateStore) UpdateNodeDrain(msgType structs.MessageType, index uint64, } func (s *StateStore) updateNodeDrainImpl(txn *txn, index uint64, nodeID string, - drain *structs.DrainStrategy, markEligible bool, updatedAt int64, event *structs.NodeEvent) error { + drain *structs.DrainStrategy, markEligible bool, updatedAt int64, + event *structs.NodeEvent, drainMeta map[string]string, accessorId string, + drainCompleted bool) error { // Lookup the node existing, err := txn.First("nodes", "id", nodeID) @@ -1005,6 +1013,39 @@ func (s *StateStore) updateNodeDrainImpl(txn *txn, index uint64, nodeID string, copyNode.SchedulingEligibility = structs.NodeSchedulingEligible } + // Update LastDrain + updateTime := time.Unix(updatedAt, 0) + // if starting a new drain operation, create a new LastDrain. otherwise, update the existing one. + // if LastDrain doesn't exist, we'll need to create a new one. this might happen if we upgrade the + // server to 1.0.4 during a drain operation + if existingNode.DrainStrategy == nil && drain != nil || existingNode.LastDrain == nil { + // starting a new drain operation + copyNode.LastDrain = &structs.DrainMetadata{ + StartedAt: updateTime, + UpdatedAt: updateTime, + Status: structs.DrainStatusDraining, + AccessorID: accessorId, + Meta: drainMeta, + } + } else { + copyNode.LastDrain.UpdatedAt = updateTime + if accessorId != "" { + // we won't have an accessor ID for drain complete; don't overwrite the existing one + copyNode.LastDrain.AccessorID = accessorId + } + if drainMeta != nil { + // similarly, won't have metadata for drain complete; keep existing + copyNode.Meta = drainMeta + } + if drain == nil { + if drainCompleted { + copyNode.LastDrain.Status = structs.DrainStatusCompleted + } else { + copyNode.LastDrain.Status = structs.DrainStatusCancelled + } + } + } + copyNode.ModifyIndex = index // Insert the node diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index f645a278824..3d9c217f82f 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -965,6 +965,9 @@ func TestStateStore_BatchUpdateNodeDrain(t *testing.T) { require.Nil(err) require.NotNil(out.DrainStrategy) require.Equal(out.DrainStrategy, expectedDrain) + // TODO: cgbaker: more tests for LastDrain + require.NotNil(out.LastDrain) + require.Equal(structs.DrainStatusDraining, out.LastDrain.Status) require.Len(out.Events, 2) require.EqualValues(1002, out.ModifyIndex) require.EqualValues(7, out.StatusUpdatedAt) @@ -1001,13 +1004,16 @@ func TestStateStore_UpdateNodeDrain_Node(t *testing.T) { Subsystem: structs.NodeEventSubsystemDrain, Timestamp: time.Now(), } - require.Nil(state.UpdateNodeDrain(structs.MsgTypeTestSetup, 1001, node.ID, expectedDrain, false, 7, event)) + require.Nil(state.UpdateNodeDrain(structs.MsgTypeTestSetup, 1001, node.ID, expectedDrain, false, 7, event, nil, "")) require.True(watchFired(ws)) ws = memdb.NewWatchSet() out, err := state.NodeByID(ws, node.ID) require.Nil(err) require.NotNil(out.DrainStrategy) + // TODO: cgbaker: more tests for LastDrain + require.NotNil(out.LastDrain) + require.Equal(structs.DrainStatusDraining, out.LastDrain.Status) require.Equal(out.DrainStrategy, expectedDrain) require.Len(out.Events, 2) require.EqualValues(1001, out.ModifyIndex) @@ -1136,7 +1142,7 @@ func TestStateStore_UpdateNodeDrain_ResetEligiblity(t *testing.T) { Subsystem: structs.NodeEventSubsystemDrain, Timestamp: time.Now(), } - require.Nil(state.UpdateNodeDrain(structs.MsgTypeTestSetup, 1001, node.ID, drain, false, 7, event1)) + require.Nil(state.UpdateNodeDrain(structs.MsgTypeTestSetup, 1001, node.ID, drain, false, 7, event1, nil, "")) require.True(watchFired(ws)) // Remove the drain @@ -1145,13 +1151,18 @@ func TestStateStore_UpdateNodeDrain_ResetEligiblity(t *testing.T) { Subsystem: structs.NodeEventSubsystemDrain, Timestamp: time.Now(), } - require.Nil(state.UpdateNodeDrain(structs.MsgTypeTestSetup, 1002, node.ID, nil, true, 9, event2)) + require.Nil(state.UpdateNodeDrain(structs.MsgTypeTestSetup, 1002, node.ID, nil, true, 9, event2, nil, "")) ws = memdb.NewWatchSet() out, err := state.NodeByID(ws, node.ID) require.Nil(err) require.Nil(out.DrainStrategy) require.Equal(out.SchedulingEligibility, structs.NodeSchedulingEligible) + // TODO: cgbaker: more tests for LastDrain + require.NotNil(out.LastDrain) + require.Equal(structs.DrainStatusCancelled, out.LastDrain.Status) + require.Equal(time.Unix(7, 0), out.LastDrain.StartedAt) + require.Equal(time.Unix(9, 0), out.LastDrain.UpdatedAt) require.Len(out.Events, 3) require.EqualValues(1002, out.ModifyIndex) require.EqualValues(9, out.StatusUpdatedAt) @@ -1210,7 +1221,7 @@ func TestStateStore_UpdateNodeEligibility(t *testing.T) { Deadline: -1 * time.Second, }, } - require.Nil(state.UpdateNodeDrain(structs.MsgTypeTestSetup, 1002, node.ID, expectedDrain, false, 7, nil)) + require.Nil(state.UpdateNodeDrain(structs.MsgTypeTestSetup, 1002, node.ID, expectedDrain, false, 7, nil, nil, "")) // Try to set the node to eligible err = state.UpdateNodeEligibility(structs.MsgTypeTestSetup, 1003, node.ID, structs.NodeSchedulingEligible, 9, nil) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 4cf0afc2ed5..9678af53e01 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -508,6 +508,9 @@ type NodeUpdateDrainRequest struct { // UpdatedAt represents server time of receiving request UpdatedAt int64 + // Meta is user-provided metadata relating to the drain operation + Meta map[string]string + WriteRequest } @@ -1763,6 +1766,43 @@ func (d *DrainStrategy) Equal(o *DrainStrategy) bool { return true } +const ( + // DrainStatuses are the various states a drain can be in, as reflect in DrainMetadata + DrainStatusDraining = "draining" + DrainStatusCompleted = "complete" + DrainStatusCancelled = "cancelled" +) + +// DrainMetadata contains information about the most recent drain operation for a given Node. +type DrainMetadata struct { + // StartedAt is the time that the drain operation started. This is equal to Node.DrainStrategy.StartedAt, + // if it exists + StartedAt time.Time + + // UpdatedAt is the time that that this struct was most recently updated, either via API action + // or drain completion + UpdatedAt time.Time + + // Status reflects the status of the drain operation: "draining", "completed", "cancelled" + Status string + + // AccessorID is the accessor ID of the ACL token used in the most recent API operation against this drain + AccessorID string + + // Meta includes the operator-submitted metadata about this drain operation + Meta map[string]string +} + +func (m *DrainMetadata) Copy() *DrainMetadata { + if m == nil { + return nil + } + c := new(DrainMetadata) + *c = *m + c.Meta = helper.CopyMapStringString(m.Meta) + return c +} + // Node is a representation of a schedulable client node type Node struct { // ID is a unique identifier for the node. It can be constructed @@ -1773,7 +1813,7 @@ type Node struct { // SecretID is an ID that is only known by the Node and the set of Servers. // It is not accessible via the API and is used to authenticate nodes // conducting privileged activities. - SecretID string `json:"-"` + SecretID string // Datacenter for this node Datacenter string @@ -1864,6 +1904,9 @@ type Node struct { // HostVolumes is a map of host volume names to their configuration HostVolumes map[string]*ClientHostVolumeConfig + // LastDrain contains metadata about the most recent drain operation + LastDrain *DrainMetadata + // Raft Indexes CreateIndex uint64 ModifyIndex uint64 @@ -1941,6 +1984,7 @@ func (n *Node) Copy() *Node { nn.Meta = helper.CopyMapStringString(nn.Meta) nn.Events = copyNodeEvents(n.Events) nn.DrainStrategy = nn.DrainStrategy.Copy() + nn.LastDrain = nn.LastDrain.Copy() nn.CSIControllerPlugins = copyNodeCSI(nn.CSIControllerPlugins) nn.CSINodePlugins = copyNodeCSI(nn.CSINodePlugins) nn.Drivers = copyNodeDrivers(n.Drivers) @@ -2093,6 +2137,7 @@ func (n *Node) Stub(fields *NodeStubFields) *NodeListStub { StatusDescription: n.StatusDescription, Drivers: n.Drivers, HostVolumes: n.HostVolumes, + LastDrain: n.LastDrain, CreateIndex: n.CreateIndex, ModifyIndex: n.ModifyIndex, } @@ -2124,6 +2169,7 @@ type NodeListStub struct { HostVolumes map[string]*ClientHostVolumeConfig NodeResources *NodeResources `json:",omitempty"` ReservedResources *NodeReservedResources `json:",omitempty"` + LastDrain *DrainMetadata CreateIndex uint64 ModifyIndex uint64 } diff --git a/vendor/github.com/hashicorp/nomad/api/nodes.go b/vendor/github.com/hashicorp/nomad/api/nodes.go index 4c9b8c70058..db8c04788ec 100644 --- a/vendor/github.com/hashicorp/nomad/api/nodes.go +++ b/vendor/github.com/hashicorp/nomad/api/nodes.go @@ -468,6 +468,15 @@ type HostVolumeInfo struct { ReadOnly bool } +// DrainMetadata contains information about the most recent drain operation for a given Node. +type DrainMetadata struct { + StartedAt time.Time + UpdatedAt time.Time + Status string + AccessorID string + Meta map[string]string +} + // Node is used to deserialize a node entry. type Node struct { ID string @@ -494,6 +503,7 @@ type Node struct { HostVolumes map[string]*HostVolumeInfo CSIControllerPlugins map[string]*CSIInfo CSINodePlugins map[string]*CSIInfo + LastDrain *DrainMetadata CreateIndex uint64 ModifyIndex uint64 } @@ -789,6 +799,7 @@ type NodeListStub struct { Drivers map[string]*DriverInfo NodeResources *NodeResources `json:",omitempty"` ReservedResources *NodeReservedResources `json:",omitempty"` + LastDrain *DrainMetadata CreateIndex uint64 ModifyIndex uint64 } From 38fbdd3842a6d8b92391bc411806b395b57bc45b Mon Sep 17 00:00:00 2001 From: Chris Baker <1675087+cgbaker@users.noreply.github.com> Date: Sun, 28 Mar 2021 12:09:05 +0000 Subject: [PATCH 02/19] node drain metadata wip --- api/nodes.go | 38 ++++++++++++++++- api/nodes_test.go | 15 ++++--- command/agent/node_endpoint.go | 1 + nomad/state/state_store.go | 25 +++++++---- .../github.com/hashicorp/nomad/api/nodes.go | 42 ++++++++++++++++++- 5 files changed, 105 insertions(+), 16 deletions(-) diff --git a/api/nodes.go b/api/nodes.go index 314a782d914..14f58c776ea 100644 --- a/api/nodes.go +++ b/api/nodes.go @@ -71,6 +71,9 @@ type NodeUpdateDrainRequest struct { // MarkEligible marks the node as eligible for scheduling if removing // the drain strategy. MarkEligible bool + + // Meta allows operators to specify metadata related to the drain operation + Meta map[string]string } // NodeDrainUpdateResponse is used to respond to a node drain update @@ -81,14 +84,45 @@ type NodeDrainUpdateResponse struct { WriteMeta } +// DrainOptions is used to pass through node drain parameters +type DrainOptions struct { + // DrainSpec contains the drain specification for the node. If non-nil, + // the node will be marked ineligible and begin/continue draining according + // to the provided drain spec. + // If nil, any existing drain operation will be canceled. + DrainSpec *DrainSpec + + // MarkEligible indicates whether the node should be marked as eligible when + // canceling a drain operation. + MarkEligible bool + + // Meta is metadata that is persisted in Node.LastDrain about this + // drain update. + Meta map[string]string +} + // UpdateDrain is used to update the drain strategy for a given node. If // markEligible is true and the drain is being removed, the node will be marked // as having its scheduling being eligible func (n *Nodes) UpdateDrain(nodeID string, spec *DrainSpec, markEligible bool, q *WriteOptions) (*NodeDrainUpdateResponse, error) { - req := &NodeUpdateDrainRequest{ - NodeID: nodeID, + resp, err := n.UpdateDrainOpts(nodeID, DrainOptions{ DrainSpec: spec, MarkEligible: markEligible, + Meta: nil, + }, q) + return resp, err +} + +// UpdateDrainWithMeta is used to update the drain strategy for a given node. If +// markEligible is true and the drain is being removed, the node will be marked +// as having its scheduling being eligible +func (n *Nodes) UpdateDrainOpts(nodeID string, opts DrainOptions, q *WriteOptions) (*NodeDrainUpdateResponse, + error) { + req := &NodeUpdateDrainRequest{ + NodeID: nodeID, + DrainSpec: opts.DrainSpec, + MarkEligible: opts.MarkEligible, + Meta: opts.Meta, } var resp NodeDrainUpdateResponse diff --git a/api/nodes_test.go b/api/nodes_test.go index 2fd9a77f356..ec0bd513714 100644 --- a/api/nodes_test.go +++ b/api/nodes_test.go @@ -248,7 +248,14 @@ func TestNodes_ToggleDrain(t *testing.T) { spec := &DrainSpec{ Deadline: 10 * time.Second, } - drainOut, err := nodes.UpdateDrain(nodeID, spec, false, nil) + drainMeta := map[string]string{ + "reason": "this node needs to go", + } + drainOut, err := nodes.UpdateDrainOpts(nodeID, DrainOptions{ + DrainSpec: spec, + MarkEligible: false, + Meta: drainMeta, + }, nil) require.Nil(err) assertWriteMeta(t, &drainOut.WriteMeta) @@ -280,16 +287,14 @@ func TestNodes_ToggleDrain(t *testing.T) { now := time.Now() require.True(!node.LastDrain.StartedAt.Before(timeBeforeDrain) && !node.LastDrain.StartedAt.After(now), "wanted %v <= %v <= %v", timeBeforeDrain, node.LastDrain.StartedAt, now) - // TODO: test meta vvvv - // require.Equal(node.LastDrain.Meta["reason"]) + require.Equal(drainMeta, node.LastDrain.Meta) sawDraining = node.ModifyIndex } else if sawDraining != 0 && node.ModifyIndex > sawDraining && !node.Drain && node.SchedulingEligibility == NodeSchedulingIneligible { require.NotNil(node.LastDrain) require.Equal(DrainStatusCompleted, node.LastDrain.Status) require.True(!node.LastDrain.UpdatedAt.Before(node.LastDrain.StartedAt)) - // TODO: test meta vvvv - // require.Equal(node.LastDrain.Meta["reason"]) + require.Equal(drainMeta, node.LastDrain.Meta) sawDrainComplete = node.ModifyIndex } } diff --git a/command/agent/node_endpoint.go b/command/agent/node_endpoint.go index c5ff81d8156..90175f4fbcb 100644 --- a/command/agent/node_endpoint.go +++ b/command/agent/node_endpoint.go @@ -125,6 +125,7 @@ func (s *HTTPServer) nodeToggleDrain(resp http.ResponseWriter, req *http.Request args := structs.NodeUpdateDrainRequest{ NodeID: nodeID, MarkEligible: drainRequest.MarkEligible, + Meta: drainRequest.Meta, } if drainRequest.DrainSpec != nil { args.DrainStrategy = &structs.DrainStrategy{ diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 6d93f1b5313..0480fc7300a 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -1015,18 +1015,29 @@ func (s *StateStore) updateNodeDrainImpl(txn *txn, index uint64, nodeID string, // Update LastDrain updateTime := time.Unix(updatedAt, 0) + // when done with this method, copyNode.LastDrain should be set + // this is either a new LastDrain struct or an update of the existing one + // // if starting a new drain operation, create a new LastDrain. otherwise, update the existing one. - // if LastDrain doesn't exist, we'll need to create a new one. this might happen if we upgrade the - // server to 1.0.4 during a drain operation - if existingNode.DrainStrategy == nil && drain != nil || existingNode.LastDrain == nil { - // starting a new drain operation + // if already draining and LastDrain doesn't exist, we'll need to create a new one. + // this might happen if we upgrade/transition to 1.1 during a drain operation + if existingNode.DrainStrategy == nil && copyNode.DrainStrategy != nil || + copyNode.LastDrain == nil { + copyNode.LastDrain = &structs.DrainMetadata{ StartedAt: updateTime, UpdatedAt: updateTime, - Status: structs.DrainStatusDraining, AccessorID: accessorId, Meta: drainMeta, } + switch { + case drain != nil: + copyNode.LastDrain.Status = structs.DrainStatusDraining + case drainCompleted: + copyNode.LastDrain.Status = structs.DrainStatusCompleted + default: + copyNode.LastDrain.Status = structs.DrainStatusCancelled + } } else { copyNode.LastDrain.UpdatedAt = updateTime if accessorId != "" { @@ -1034,8 +1045,8 @@ func (s *StateStore) updateNodeDrainImpl(txn *txn, index uint64, nodeID string, copyNode.LastDrain.AccessorID = accessorId } if drainMeta != nil { - // similarly, won't have metadata for drain complete; keep existing - copyNode.Meta = drainMeta + // similarly, won't have metadata for drain complete; keep the existing operator-provided metadata + copyNode.LastDrain.Meta = drainMeta } if drain == nil { if drainCompleted { diff --git a/vendor/github.com/hashicorp/nomad/api/nodes.go b/vendor/github.com/hashicorp/nomad/api/nodes.go index db8c04788ec..14f58c776ea 100644 --- a/vendor/github.com/hashicorp/nomad/api/nodes.go +++ b/vendor/github.com/hashicorp/nomad/api/nodes.go @@ -18,6 +18,10 @@ const ( // status being ready. NodeSchedulingEligible = "eligible" NodeSchedulingIneligible = "ineligible" + + DrainStatusDraining = "draining" + DrainStatusCompleted = "complete" + DrainStatusCancelled = "cancelled" ) // Nodes is used to query node-related API endpoints @@ -67,6 +71,9 @@ type NodeUpdateDrainRequest struct { // MarkEligible marks the node as eligible for scheduling if removing // the drain strategy. MarkEligible bool + + // Meta allows operators to specify metadata related to the drain operation + Meta map[string]string } // NodeDrainUpdateResponse is used to respond to a node drain update @@ -77,14 +84,45 @@ type NodeDrainUpdateResponse struct { WriteMeta } +// DrainOptions is used to pass through node drain parameters +type DrainOptions struct { + // DrainSpec contains the drain specification for the node. If non-nil, + // the node will be marked ineligible and begin/continue draining according + // to the provided drain spec. + // If nil, any existing drain operation will be canceled. + DrainSpec *DrainSpec + + // MarkEligible indicates whether the node should be marked as eligible when + // canceling a drain operation. + MarkEligible bool + + // Meta is metadata that is persisted in Node.LastDrain about this + // drain update. + Meta map[string]string +} + // UpdateDrain is used to update the drain strategy for a given node. If // markEligible is true and the drain is being removed, the node will be marked // as having its scheduling being eligible func (n *Nodes) UpdateDrain(nodeID string, spec *DrainSpec, markEligible bool, q *WriteOptions) (*NodeDrainUpdateResponse, error) { - req := &NodeUpdateDrainRequest{ - NodeID: nodeID, + resp, err := n.UpdateDrainOpts(nodeID, DrainOptions{ DrainSpec: spec, MarkEligible: markEligible, + Meta: nil, + }, q) + return resp, err +} + +// UpdateDrainWithMeta is used to update the drain strategy for a given node. If +// markEligible is true and the drain is being removed, the node will be marked +// as having its scheduling being eligible +func (n *Nodes) UpdateDrainOpts(nodeID string, opts DrainOptions, q *WriteOptions) (*NodeDrainUpdateResponse, + error) { + req := &NodeUpdateDrainRequest{ + NodeID: nodeID, + DrainSpec: opts.DrainSpec, + MarkEligible: opts.MarkEligible, + Meta: opts.Meta, } var resp NodeDrainUpdateResponse From ed88a0dafe74b7e20c63299c90364e2a1f4080e5 Mon Sep 17 00:00:00 2001 From: Chris Baker <1675087+cgbaker@users.noreply.github.com> Date: Sun, 28 Mar 2021 14:53:47 +0000 Subject: [PATCH 03/19] wip: more work on node drain metadata, needs testing --- api/event_stream_test.go | 2 +- command/agent/node_endpoint_test.go | 14 +++++++++ nomad/drainer/watch_nodes_test.go | 4 +-- nomad/node_endpoint.go | 3 ++ nomad/state/state_store.go | 44 ++++++++++++++--------------- 5 files changed, 42 insertions(+), 25 deletions(-) diff --git a/api/event_stream_test.go b/api/event_stream_test.go index e5f3492da4f..c2ea454d7e8 100644 --- a/api/event_stream_test.go +++ b/api/event_stream_test.go @@ -167,7 +167,7 @@ func TestEventStream_PayloadValue(t *testing.T) { require.Contains(t, raw, "Node") rawNode := raw["Node"] require.Equal(t, n.ID, rawNode["ID"]) - require.NotContains(t, rawNode, "SecretID") + require.Empty(t, rawNode["SecretID"]) } case <-time.After(5 * time.Second): require.Fail(t, "failed waiting for event stream event") diff --git a/command/agent/node_endpoint_test.go b/command/agent/node_endpoint_test.go index bfb52e81c06..0b57e65dd4c 100644 --- a/command/agent/node_endpoint_test.go +++ b/command/agent/node_endpoint_test.go @@ -257,8 +257,13 @@ func TestHTTP_NodeDrain(t *testing.T) { DrainSpec: &api.DrainSpec{ Deadline: 10 * time.Second, }, + Meta: map[string]string{ + "reason": "drain", + }, } + beforeDrain := time.Now().Add(-1 * time.Second) // handle roundoff + // Make the HTTP request buf := encodeReq(drainReq) req, err := http.NewRequest("POST", "/v1/node/"+node.ID+"/drain", buf) @@ -292,6 +297,11 @@ func TestHTTP_NodeDrain(t *testing.T) { require.Equal(structs.NodeSchedulingIneligible, out.SchedulingEligibility) } + require.NotNil(out.LastDrain) + require.Equal(map[string]string{ + "reason": "drain", + }, out.LastDrain.Meta) + // Make the HTTP request to unset drain drainReq.DrainSpec = nil buf = encodeReq(drainReq) @@ -306,6 +316,10 @@ func TestHTTP_NodeDrain(t *testing.T) { out, err = state.NodeByID(nil, node.ID) require.Nil(err) require.Nil(out.DrainStrategy) + require.NotNil(out.LastDrain) + require.False(out.LastDrain.StartedAt.Before(beforeDrain)) + require.False(out.LastDrain.UpdatedAt.Before(out.LastDrain.StartedAt)) + require.Equal(structs.DrainStatusCompleted, out.LastDrain.Status) }) } diff --git a/nomad/drainer/watch_nodes_test.go b/nomad/drainer/watch_nodes_test.go index 77be3d13b54..6484c175078 100644 --- a/nomad/drainer/watch_nodes_test.go +++ b/nomad/drainer/watch_nodes_test.go @@ -88,7 +88,7 @@ func TestNodeDrainWatcher_Remove(t *testing.T) { require.Equal(n, tracked[n.ID]) // Change the node to be not draining and wait for it to be untracked - require.Nil(state.UpdateNodeDrain(structs.MsgTypeTestSetup, 101, n.ID, nil, false, 0, nil)) + require.Nil(state.UpdateNodeDrain(structs.MsgTypeTestSetup, 101, n.ID, nil, false, 0, nil, nil, "")) testutil.WaitForResult(func() (bool, error) { return len(m.events()) == 2, nil }, func(err error) { @@ -166,7 +166,7 @@ func TestNodeDrainWatcher_Update(t *testing.T) { // Change the node to have a new spec s2 := n.DrainStrategy.Copy() s2.Deadline += time.Hour - require.Nil(state.UpdateNodeDrain(structs.MsgTypeTestSetup, 101, n.ID, s2, false, 0, nil)) + require.Nil(state.UpdateNodeDrain(structs.MsgTypeTestSetup, 101, n.ID, s2, false, 0, nil, nil, "")) // Wait for it to be updated testutil.WaitForResult(func() (bool, error) { diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go index 1431a1ceb5f..bdc3bbe745e 100644 --- a/nomad/node_endpoint.go +++ b/nomad/node_endpoint.go @@ -515,6 +515,8 @@ func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest, } defer metrics.MeasureSince([]string{"nomad", "client", "update_drain"}, time.Now()) + n.logger.Warn("Node.UpdateDrain info", "meta", args.Meta) + // Check node write permissions if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { return err @@ -801,6 +803,7 @@ func (n *Node) GetNode(args *structs.NodeSpecificRequest, // Setup the output if out != nil { + n.logger.Warn("Node.GetNode()", "LastDrain", out.LastDrain) out = out.Sanitize() reply.Node = out reply.Index = out.ModifyIndex diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 0480fc7300a..124f1dba969 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -997,70 +997,70 @@ func (s *StateStore) updateNodeDrainImpl(txn *txn, index uint64, nodeID string, // Copy the existing node existingNode := existing.(*structs.Node) - copyNode := existingNode.Copy() - copyNode.StatusUpdatedAt = updatedAt + updatedNode := existingNode.Copy() + updatedNode.StatusUpdatedAt = updatedAt // Add the event if given if event != nil { - appendNodeEvents(index, copyNode, []*structs.NodeEvent{event}) + appendNodeEvents(index, updatedNode, []*structs.NodeEvent{event}) } // Update the drain in the copy - copyNode.DrainStrategy = drain + updatedNode.DrainStrategy = drain if drain != nil { - copyNode.SchedulingEligibility = structs.NodeSchedulingIneligible + updatedNode.SchedulingEligibility = structs.NodeSchedulingIneligible } else if markEligible { - copyNode.SchedulingEligibility = structs.NodeSchedulingEligible + updatedNode.SchedulingEligibility = structs.NodeSchedulingEligible } // Update LastDrain updateTime := time.Unix(updatedAt, 0) - // when done with this method, copyNode.LastDrain should be set + // when done with this method, updatedNode.LastDrain should be set // this is either a new LastDrain struct or an update of the existing one // // if starting a new drain operation, create a new LastDrain. otherwise, update the existing one. // if already draining and LastDrain doesn't exist, we'll need to create a new one. // this might happen if we upgrade/transition to 1.1 during a drain operation - if existingNode.DrainStrategy == nil && copyNode.DrainStrategy != nil || - copyNode.LastDrain == nil { + if existingNode.DrainStrategy == nil && updatedNode.DrainStrategy != nil || + updatedNode.LastDrain == nil { - copyNode.LastDrain = &structs.DrainMetadata{ + updatedNode.LastDrain = &structs.DrainMetadata{ StartedAt: updateTime, UpdatedAt: updateTime, AccessorID: accessorId, Meta: drainMeta, } switch { - case drain != nil: - copyNode.LastDrain.Status = structs.DrainStatusDraining + case updatedNode.DrainStrategy != nil: + updatedNode.LastDrain.Status = structs.DrainStatusDraining case drainCompleted: - copyNode.LastDrain.Status = structs.DrainStatusCompleted + updatedNode.LastDrain.Status = structs.DrainStatusCompleted default: - copyNode.LastDrain.Status = structs.DrainStatusCancelled + updatedNode.LastDrain.Status = structs.DrainStatusCancelled } } else { - copyNode.LastDrain.UpdatedAt = updateTime + updatedNode.LastDrain.UpdatedAt = updateTime if accessorId != "" { // we won't have an accessor ID for drain complete; don't overwrite the existing one - copyNode.LastDrain.AccessorID = accessorId + updatedNode.LastDrain.AccessorID = accessorId } if drainMeta != nil { // similarly, won't have metadata for drain complete; keep the existing operator-provided metadata - copyNode.LastDrain.Meta = drainMeta + updatedNode.LastDrain.Meta = drainMeta } - if drain == nil { + if updatedNode.DrainStrategy == nil { if drainCompleted { - copyNode.LastDrain.Status = structs.DrainStatusCompleted + updatedNode.LastDrain.Status = structs.DrainStatusCompleted } else { - copyNode.LastDrain.Status = structs.DrainStatusCancelled + updatedNode.LastDrain.Status = structs.DrainStatusCancelled } } } - copyNode.ModifyIndex = index + updatedNode.ModifyIndex = index // Insert the node - if err := txn.Insert("nodes", copyNode); err != nil { + if err := txn.Insert("nodes", updatedNode); err != nil { return fmt.Errorf("node update failed: %v", err) } if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { From 00bc7a40a52401c2fbd7251b02b1f2e130871006 Mon Sep 17 00:00:00 2001 From: Chris Baker <1675087+cgbaker@users.noreply.github.com> Date: Mon, 29 Mar 2021 22:53:37 +0000 Subject: [PATCH 04/19] updated tests --- command/agent/node_endpoint_test.go | 48 ++++++---- nomad/node_endpoint_test.go | 134 ++++++++++++++++++++++++++-- nomad/state/state_store.go | 4 +- nomad/structs/structs.go | 2 +- 4 files changed, 159 insertions(+), 29 deletions(-) diff --git a/command/agent/node_endpoint_test.go b/command/agent/node_endpoint_test.go index 0b57e65dd4c..098dcd59502 100644 --- a/command/agent/node_endpoint_test.go +++ b/command/agent/node_endpoint_test.go @@ -43,13 +43,13 @@ func TestHTTP_NodesList(t *testing.T) { } // Check for the index - if respW.HeaderMap.Get("X-Nomad-Index") == "" { + if respW.Header().Get("X-Nomad-Index") == "" { t.Fatalf("missing index") } - if respW.HeaderMap.Get("X-Nomad-KnownLeader") != "true" { + if respW.Header().Get("X-Nomad-KnownLeader") != "true" { t.Fatalf("missing known leader") } - if respW.HeaderMap.Get("X-Nomad-LastContact") == "" { + if respW.Header().Get("X-Nomad-LastContact") == "" { t.Fatalf("missing last contact") } @@ -100,13 +100,13 @@ func TestHTTP_NodesPrefixList(t *testing.T) { } // Check for the index - if respW.HeaderMap.Get("X-Nomad-Index") == "" { + if respW.Header().Get("X-Nomad-Index") == "" { t.Fatalf("missing index") } - if respW.HeaderMap.Get("X-Nomad-KnownLeader") != "true" { + if respW.Header().Get("X-Nomad-KnownLeader") != "true" { t.Fatalf("missing known leader") } - if respW.HeaderMap.Get("X-Nomad-LastContact") == "" { + if respW.Header().Get("X-Nomad-LastContact") == "" { t.Fatalf("missing last contact") } @@ -158,7 +158,7 @@ func TestHTTP_NodeForceEval(t *testing.T) { } // Check for the index - if respW.HeaderMap.Get("X-Nomad-Index") == "" { + if respW.Header().Get("X-Nomad-Index") == "" { t.Fatalf("missing index") } @@ -218,13 +218,13 @@ func TestHTTP_NodeAllocations(t *testing.T) { } // Check for the index - if respW.HeaderMap.Get("X-Nomad-Index") == "" { + if respW.Header().Get("X-Nomad-Index") == "" { t.Fatalf("missing index") } - if respW.HeaderMap.Get("X-Nomad-KnownLeader") != "true" { + if respW.Header().Get("X-Nomad-KnownLeader") != "true" { t.Fatalf("missing known leader") } - if respW.HeaderMap.Get("X-Nomad-LastContact") == "" { + if respW.Header().Get("X-Nomad-LastContact") == "" { t.Fatalf("missing last contact") } @@ -275,13 +275,13 @@ func TestHTTP_NodeDrain(t *testing.T) { require.Nil(err) // Check for the index - require.NotZero(respW.HeaderMap.Get("X-Nomad-Index")) + require.NotEmpty(respW.Header().Get("X-Nomad-Index")) // Check the response dresp, ok := obj.(structs.NodeDrainUpdateResponse) require.True(ok) - t.Logf("response index=%v node_update_index=0x%x", respW.HeaderMap.Get("X-Nomad-Index"), + t.Logf("response index=%v node_update_index=0x%x", respW.Header().Get("X-Nomad-Index"), dresp.NodeModifyIndex) // Check that the node has been updated @@ -304,6 +304,9 @@ func TestHTTP_NodeDrain(t *testing.T) { // Make the HTTP request to unset drain drainReq.DrainSpec = nil + drainReq.Meta = map[string]string{ + "cancel_reason": "changed my mind", + } buf = encodeReq(drainReq) req, err = http.NewRequest("POST", "/v1/node/"+node.ID+"/drain", buf) require.Nil(err) @@ -319,7 +322,16 @@ func TestHTTP_NodeDrain(t *testing.T) { require.NotNil(out.LastDrain) require.False(out.LastDrain.StartedAt.Before(beforeDrain)) require.False(out.LastDrain.UpdatedAt.Before(out.LastDrain.StartedAt)) - require.Equal(structs.DrainStatusCompleted, out.LastDrain.Status) + require.Contains([]string{structs.DrainStatusCancelled, structs.DrainStatusComplete}, out.LastDrain.Status) + if out.LastDrain.Status == structs.DrainStatusComplete { + require.Equal(map[string]string{ + "reason": "drain", + }, out.LastDrain.Meta) + } else if out.LastDrain.Status == structs.DrainStatusCancelled { + require.Equal(map[string]string{ + "cancel_reason": "changed my mind", + }, out.LastDrain.Meta) + } }) } @@ -351,7 +363,7 @@ func TestHTTP_NodeEligible(t *testing.T) { require.Nil(err) // Check for the index - require.NotZero(respW.HeaderMap.Get("X-Nomad-Index")) + require.NotZero(respW.Header().Get("X-Nomad-Index")) // Check the response _, ok := obj.(structs.NodeEligibilityUpdateResponse) @@ -417,7 +429,7 @@ func TestHTTP_NodePurge(t *testing.T) { } // Check for the index - if respW.HeaderMap.Get("X-Nomad-Index") == "" { + if respW.Header().Get("X-Nomad-Index") == "" { t.Fatalf("missing index") } @@ -470,13 +482,13 @@ func TestHTTP_NodeQuery(t *testing.T) { } // Check for the index - if respW.HeaderMap.Get("X-Nomad-Index") == "" { + if respW.Header().Get("X-Nomad-Index") == "" { t.Fatalf("missing index") } - if respW.HeaderMap.Get("X-Nomad-KnownLeader") != "true" { + if respW.Header().Get("X-Nomad-KnownLeader") != "true" { t.Fatalf("missing known leader") } - if respW.HeaderMap.Get("X-Nomad-LastContact") == "" { + if respW.Header().Get("X-Nomad-LastContact") == "" { t.Fatalf("missing last contact") } diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go index 95c29744245..3db2445a8d6 100644 --- a/nomad/node_endpoint_test.go +++ b/nomad/node_endpoint_test.go @@ -903,10 +903,8 @@ func TestClientEndpoint_UpdateDrain(t *testing.T) { dereg := &structs.NodeUpdateDrainRequest{ NodeID: node.ID, DrainStrategy: strategy, - Meta: map[string]string{ - "message": "this node looks funny", - }, - WriteRequest: structs.WriteRequest{Region: "global"}, + Meta: map[string]string{"message": "this node is not needed"}, + WriteRequest: structs.WriteRequest{Region: "global"}, } var resp2 structs.NodeDrainUpdateResponse require.Nil(msgpackrpc.CallWithCodec(codec, "Node.UpdateDrain", dereg, &resp2)) @@ -922,10 +920,12 @@ func TestClientEndpoint_UpdateDrain(t *testing.T) { require.Len(out.Events, 2) require.Equal(NodeDrainEventDrainSet, out.Events[1].Message) require.NotNil(out.LastDrain) - require.Equal(structs.DrainStatusDraining, out.LastDrain.Status) - require.True(out.LastDrain.StartedAt.Before(time.Now())) - require.Equal(out.LastDrain.StartedAt, out.LastDrain.UpdatedAt) - require.Equal("this node looks funny", out.LastDrain.Meta["message"]) + require.Equal(structs.DrainMetadata{ + StartedAt: out.LastDrain.UpdatedAt, + UpdatedAt: out.LastDrain.StartedAt, + Status: structs.DrainStatusDraining, + Meta: map[string]string{"message": "this node is not needed"}, + }, *out.LastDrain) // before+deadline should be before the forced deadline require.True(beforeUpdate.Add(strategy.Deadline).Before(out.DrainStrategy.ForceDeadline)) @@ -951,6 +951,7 @@ func TestClientEndpoint_UpdateDrain(t *testing.T) { // Update the eligibility and expect evals dereg.DrainStrategy = nil dereg.MarkEligible = true + dereg.Meta = map[string]string{"cancelled": "yes"} var resp3 structs.NodeDrainUpdateResponse require.Nil(msgpackrpc.CallWithCodec(codec, "Node.UpdateDrain", dereg, &resp3)) require.NotZero(resp3.Index) @@ -963,6 +964,15 @@ func TestClientEndpoint_UpdateDrain(t *testing.T) { require.NoError(err) require.Len(out.Events, 4) require.Equal(NodeDrainEventDrainDisabled, out.Events[3].Message) + require.NotNil(out.LastDrain) + require.NotNil(out.LastDrain) + require.False(out.LastDrain.UpdatedAt.Before(out.LastDrain.StartedAt)) + require.Equal(structs.DrainMetadata{ + StartedAt: out.LastDrain.StartedAt, + UpdatedAt: out.LastDrain.UpdatedAt, + Status: structs.DrainStatusCancelled, + Meta: map[string]string{"cancelled": "yes"}, + }, *out.LastDrain) // Check that calling UpdateDrain with the same DrainStrategy does not emit // a node event. @@ -973,6 +983,110 @@ func TestClientEndpoint_UpdateDrain(t *testing.T) { require.Len(out.Events, 4) } +func TestClientEndpoint_UpdatedDrainAndCompleted(t *testing.T) { + t.Parallel() + require := require.New(t) + + s1, cleanupS1 := TestServer(t, nil) + defer cleanupS1() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + state := s1.fsm.State() + + // Disable drainer for now + s1.nodeDrainer.SetEnabled(false, nil) + + // Create the register request + node := mock.Node() + reg := &structs.NodeRegisterRequest{ + Node: node, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + + // Fetch the response + var resp structs.NodeUpdateResponse + require.Nil(msgpackrpc.CallWithCodec(codec, "Node.Register", reg, &resp)) + + strategy := &structs.DrainStrategy{ + DrainSpec: structs.DrainSpec{ + Deadline: 10 * time.Second, + }, + } + + // Update the status + dereg := &structs.NodeUpdateDrainRequest{ + NodeID: node.ID, + DrainStrategy: strategy, + Meta: map[string]string{ + "message": "first", + }, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + var resp2 structs.NodeDrainUpdateResponse + require.Nil(msgpackrpc.CallWithCodec(codec, "Node.UpdateDrain", dereg, &resp2)) + require.NotZero(resp2.Index) + + // Check for the node in the FSM + out, err := state.NodeByID(nil, node.ID) + require.Nil(err) + require.NotNil(out.DrainStrategy) + require.NotNil(out.LastDrain) + firstDrainUpdate := out.LastDrain.UpdatedAt + require.Equal(structs.DrainMetadata{ + StartedAt: firstDrainUpdate, + UpdatedAt: firstDrainUpdate, + Status: structs.DrainStatusDraining, + Meta: map[string]string{"message": "first"}, + }, *out.LastDrain) + + time.Sleep(1 * time.Second) + + // Update the drain + dereg.DrainStrategy.DrainSpec.Deadline *= 2 + dereg.Meta["message"] = "second" + require.Nil(msgpackrpc.CallWithCodec(codec, "Node.UpdateDrain", dereg, &resp2)) + require.NotZero(resp2.Index) + + out, err = state.NodeByID(nil, node.ID) + require.Nil(err) + require.NotNil(out.DrainStrategy) + require.NotNil(out.LastDrain) + secondDrainUpdate := out.LastDrain.UpdatedAt + require.True(secondDrainUpdate.After(firstDrainUpdate)) + require.Equal(structs.DrainMetadata{ + StartedAt: firstDrainUpdate, + UpdatedAt: secondDrainUpdate, + Status: structs.DrainStatusDraining, + Meta: map[string]string{"message": "second"}, + }, *out.LastDrain) + + time.Sleep(1 * time.Second) + + // Enable the drainer, wait for completion + s1.nodeDrainer.SetEnabled(true, state) + + testutil.WaitForResult(func() (bool, error) { + out, err = state.NodeByID(nil, node.ID) + if err != nil { + return false, err + } + if out == nil { + return false, fmt.Errorf("could not find node") + } + return out.DrainStrategy == nil, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) + + require.True(out.LastDrain.UpdatedAt.After(secondDrainUpdate)) + require.Equal(structs.DrainMetadata{ + StartedAt: firstDrainUpdate, + UpdatedAt: out.LastDrain.UpdatedAt, + Status: structs.DrainStatusComplete, + Meta: map[string]string{"message": "second"}, + }, *out.LastDrain) +} + func TestClientEndpoint_UpdateDrain_ACL(t *testing.T) { t.Parallel() @@ -1029,10 +1143,14 @@ func TestClientEndpoint_UpdateDrain_ACL(t *testing.T) { } // Try with a root token + dereg.DrainStrategy.DrainSpec.Deadline = 20 * time.Second dereg.AuthToken = root.SecretID { var resp structs.NodeDrainUpdateResponse require.Nil(msgpackrpc.CallWithCodec(codec, "Node.UpdateDrain", dereg, &resp), "RPC") + out, err := state.NodeByID(nil, node.ID) + require.NoError(err) + require.Equal(root.AccessorID, out.LastDrain.AccessorID) } } diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 124f1dba969..1aefc222bd4 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -1034,7 +1034,7 @@ func (s *StateStore) updateNodeDrainImpl(txn *txn, index uint64, nodeID string, case updatedNode.DrainStrategy != nil: updatedNode.LastDrain.Status = structs.DrainStatusDraining case drainCompleted: - updatedNode.LastDrain.Status = structs.DrainStatusCompleted + updatedNode.LastDrain.Status = structs.DrainStatusComplete default: updatedNode.LastDrain.Status = structs.DrainStatusCancelled } @@ -1050,7 +1050,7 @@ func (s *StateStore) updateNodeDrainImpl(txn *txn, index uint64, nodeID string, } if updatedNode.DrainStrategy == nil { if drainCompleted { - updatedNode.LastDrain.Status = structs.DrainStatusCompleted + updatedNode.LastDrain.Status = structs.DrainStatusComplete } else { updatedNode.LastDrain.Status = structs.DrainStatusCancelled } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 9678af53e01..5a71dc07290 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1769,7 +1769,7 @@ func (d *DrainStrategy) Equal(o *DrainStrategy) bool { const ( // DrainStatuses are the various states a drain can be in, as reflect in DrainMetadata DrainStatusDraining = "draining" - DrainStatusCompleted = "complete" + DrainStatusComplete = "complete" DrainStatusCancelled = "cancelled" ) From 8bccba049b301d6aa28fcf577f3dd2d3bdeb54e5 Mon Sep 17 00:00:00 2001 From: Chris Baker <1675087+cgbaker@users.noreply.github.com> Date: Tue, 30 Mar 2021 16:03:55 +0000 Subject: [PATCH 05/19] added node drain metadata to drain CLI command, tweaked SDK interface --- api/nodes.go | 4 +- command/node_drain.go | 52 +++++++++++++++++-- .../github.com/hashicorp/nomad/api/nodes.go | 4 +- 3 files changed, 52 insertions(+), 8 deletions(-) diff --git a/api/nodes.go b/api/nodes.go index 14f58c776ea..18a4e09bea3 100644 --- a/api/nodes.go +++ b/api/nodes.go @@ -105,7 +105,7 @@ type DrainOptions struct { // markEligible is true and the drain is being removed, the node will be marked // as having its scheduling being eligible func (n *Nodes) UpdateDrain(nodeID string, spec *DrainSpec, markEligible bool, q *WriteOptions) (*NodeDrainUpdateResponse, error) { - resp, err := n.UpdateDrainOpts(nodeID, DrainOptions{ + resp, err := n.UpdateDrainOpts(nodeID, &DrainOptions{ DrainSpec: spec, MarkEligible: markEligible, Meta: nil, @@ -116,7 +116,7 @@ func (n *Nodes) UpdateDrain(nodeID string, spec *DrainSpec, markEligible bool, q // UpdateDrainWithMeta is used to update the drain strategy for a given node. If // markEligible is true and the drain is being removed, the node will be marked // as having its scheduling being eligible -func (n *Nodes) UpdateDrainOpts(nodeID string, opts DrainOptions, q *WriteOptions) (*NodeDrainUpdateResponse, +func (n *Nodes) UpdateDrainOpts(nodeID string, opts *DrainOptions, q *WriteOptions) (*NodeDrainUpdateResponse, error) { req := &NodeUpdateDrainRequest{ NodeID: nodeID, diff --git a/command/node_drain.go b/command/node_drain.go index 433be8fe75e..480fd8942e6 100644 --- a/command/node_drain.go +++ b/command/node_drain.go @@ -8,6 +8,8 @@ import ( "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/api/contexts" + flaghelper "github.com/hashicorp/nomad/helper/flags" + "github.com/posener/complete" ) @@ -71,6 +73,13 @@ Node Drain Options: the drain is being disabled. This is useful when an existing drain is being cancelled but additional scheduling on the node is not desired. + -m + Message for the drain update operation. Registered in drain metadata as + "message" for drain enable and "cancel_message" for drain disable. + + --meta = + Custom metadata to store on thed drain operation, can be used multiple times. + -self Set the drain status of the local node. @@ -95,6 +104,8 @@ func (c *NodeDrainCommand) AutocompleteFlags() complete.Flags { "-no-deadline": complete.PredictNothing, "-ignore-system": complete.PredictNothing, "-keep-ineligible": complete.PredictNothing, + "-m": complete.PredictNothing, + "--meta": complete.PredictNothing, "-self": complete.PredictNothing, "-yes": complete.PredictNothing, }) @@ -121,7 +132,8 @@ func (c *NodeDrainCommand) Run(args []string) int { var enable, disable, detach, force, noDeadline, ignoreSystem, keepIneligible, self, autoYes, monitor bool - var deadline string + var deadline, message string + var metaVars flaghelper.StringFlag flags := c.Meta.FlagSet(c.Name(), FlagSetClient) flags.Usage = func() { c.Ui.Output(c.Help()) } @@ -136,6 +148,8 @@ func (c *NodeDrainCommand) Run(args []string) int { flags.BoolVar(&self, "self", false, "") flags.BoolVar(&autoYes, "yes", false, "Automatic yes to prompts.") flags.BoolVar(&monitor, "monitor", false, "Monitor drain status.") + flags.StringVar(&message, "m", "", "Drain message") + flags.Var(&metaVars, "meta", "Drain metadata") if err := flags.Parse(args); err != nil { return 1 @@ -251,7 +265,7 @@ func (c *NodeDrainCommand) Run(args []string) int { return 1 } - // If monitoring the drain start the montior and return when done + // If monitoring the drain start the monitor and return when done if monitor { if node.DrainStrategy == nil { c.Ui.Warn("No drain strategy set") @@ -297,8 +311,38 @@ func (c *NodeDrainCommand) Run(args []string) int { } } + // fill drain metadata map, copy existing map if we're updating or cancelling drain + var drainMeta map[string]string + if node.DrainStrategy != nil && node.LastDrain != nil && node.LastDrain.Meta != nil { + drainMeta = node.LastDrain.Meta + } else { + drainMeta = make(map[string]string) + } + if message != "" { + if enable { + drainMeta["message"] = message + } else { + drainMeta["cancel_message"] = message + } + } + for _, m := range metaVars { + if len(m) == 0 { + continue + } + kv := strings.SplitN(m, "=", 2) + if len(kv) == 2 { + drainMeta[kv[0]] = kv[1] + } else { + drainMeta[kv[0]] = "" + } + } + // Toggle node draining - updateMeta, err := client.Nodes().UpdateDrain(node.ID, spec, !keepIneligible, nil) + drainResponse, err := client.Nodes().UpdateDrainOpts(node.ID, &api.DrainOptions{ + DrainSpec: spec, + MarkEligible: !keepIneligible, + Meta: drainMeta, + }, nil) if err != nil { c.Ui.Error(fmt.Sprintf("Error updating drain specification: %s", err)) return 1 @@ -316,7 +360,7 @@ func (c *NodeDrainCommand) Run(args []string) int { now := time.Now() c.Ui.Info(fmt.Sprintf("%s: Ctrl-C to stop monitoring: will not cancel the node drain", formatTime(now))) c.Ui.Output(fmt.Sprintf("%s: Node %q drain strategy set", formatTime(now), node.ID)) - c.monitorDrain(client, context.Background(), node, updateMeta.LastIndex, ignoreSystem) + c.monitorDrain(client, context.Background(), node, drainResponse.LastIndex, ignoreSystem) } return 0 } diff --git a/vendor/github.com/hashicorp/nomad/api/nodes.go b/vendor/github.com/hashicorp/nomad/api/nodes.go index 14f58c776ea..18a4e09bea3 100644 --- a/vendor/github.com/hashicorp/nomad/api/nodes.go +++ b/vendor/github.com/hashicorp/nomad/api/nodes.go @@ -105,7 +105,7 @@ type DrainOptions struct { // markEligible is true and the drain is being removed, the node will be marked // as having its scheduling being eligible func (n *Nodes) UpdateDrain(nodeID string, spec *DrainSpec, markEligible bool, q *WriteOptions) (*NodeDrainUpdateResponse, error) { - resp, err := n.UpdateDrainOpts(nodeID, DrainOptions{ + resp, err := n.UpdateDrainOpts(nodeID, &DrainOptions{ DrainSpec: spec, MarkEligible: markEligible, Meta: nil, @@ -116,7 +116,7 @@ func (n *Nodes) UpdateDrain(nodeID string, spec *DrainSpec, markEligible bool, q // UpdateDrainWithMeta is used to update the drain strategy for a given node. If // markEligible is true and the drain is being removed, the node will be marked // as having its scheduling being eligible -func (n *Nodes) UpdateDrainOpts(nodeID string, opts DrainOptions, q *WriteOptions) (*NodeDrainUpdateResponse, +func (n *Nodes) UpdateDrainOpts(nodeID string, opts *DrainOptions, q *WriteOptions) (*NodeDrainUpdateResponse, error) { req := &NodeUpdateDrainRequest{ NodeID: nodeID, From 9132b3d848e2e15080c567d1efbfccc0ed4681fd Mon Sep 17 00:00:00 2001 From: Chris Baker <1675087+cgbaker@users.noreply.github.com> Date: Tue, 30 Mar 2021 20:48:37 +0000 Subject: [PATCH 06/19] better handling for drain no-ops --- nomad/node_endpoint.go | 3 -- nomad/node_endpoint_test.go | 81 +++++++++++++++++++++++++++++++++++-- nomad/state/state_store.go | 68 ++++++++++++++++--------------- 3 files changed, 113 insertions(+), 39 deletions(-) diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go index bdc3bbe745e..1431a1ceb5f 100644 --- a/nomad/node_endpoint.go +++ b/nomad/node_endpoint.go @@ -515,8 +515,6 @@ func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest, } defer metrics.MeasureSince([]string{"nomad", "client", "update_drain"}, time.Now()) - n.logger.Warn("Node.UpdateDrain info", "meta", args.Meta) - // Check node write permissions if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { return err @@ -803,7 +801,6 @@ func (n *Node) GetNode(args *structs.NodeSpecificRequest, // Setup the output if out != nil { - n.logger.Warn("Node.GetNode()", "LastDrain", out.LastDrain) out = out.Sanitize() reply.Node = out reply.Index = out.ModifyIndex diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go index 3db2445a8d6..a35515735eb 100644 --- a/nomad/node_endpoint_test.go +++ b/nomad/node_endpoint_test.go @@ -11,6 +11,11 @@ import ( memdb "github.com/hashicorp/go-memdb" msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc" + vapi "github.com/hashicorp/vault/api" + "github.com/kr/pretty" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/hashicorp/nomad/acl" "github.com/hashicorp/nomad/command/agent/consul" "github.com/hashicorp/nomad/helper" @@ -19,10 +24,6 @@ import ( "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" - vapi "github.com/hashicorp/vault/api" - "github.com/kr/pretty" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" ) func TestClientEndpoint_Register(t *testing.T) { @@ -1087,6 +1088,78 @@ func TestClientEndpoint_UpdatedDrainAndCompleted(t *testing.T) { }, *out.LastDrain) } +func TestClientEndpoint_UpdatedDrainNoop(t *testing.T) { + t.Parallel() + require := require.New(t) + + s1, cleanupS1 := TestServer(t, nil) + defer cleanupS1() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + state := s1.fsm.State() + + // Create the register request + node := mock.Node() + reg := &structs.NodeRegisterRequest{ + Node: node, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + + // Fetch the response + var resp structs.NodeUpdateResponse + require.Nil(msgpackrpc.CallWithCodec(codec, "Node.Register", reg, &resp)) + + // Update the status + dereg := &structs.NodeUpdateDrainRequest{ + NodeID: node.ID, + DrainStrategy: &structs.DrainStrategy{ + DrainSpec: structs.DrainSpec{ + Deadline: 10 * time.Second, + }, + }, + Meta: map[string]string{ + "message": "drain", + }, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + var drainResp structs.NodeDrainUpdateResponse + require.Nil(msgpackrpc.CallWithCodec(codec, "Node.UpdateDrain", dereg, &drainResp)) + require.NotZero(drainResp.Index) + + var out *structs.Node + testutil.WaitForResult(func() (bool, error) { + var err error + out, err = state.NodeByID(nil, node.ID) + if err != nil { + return false, err + } + if out == nil { + return false, fmt.Errorf("could not find node") + } + return out.DrainStrategy == nil && out.SchedulingEligibility == structs.NodeSchedulingIneligible, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) + + require.Equal(structs.DrainStatusComplete, out.LastDrain.Status) + require.Equal(map[string]string{"message": "drain"}, out.LastDrain.Meta) + prevDrain := out.LastDrain + + // call again with Drain Strategy nil; should be a no-op because drain is already complete + dereg.DrainStrategy = nil + dereg.Meta = map[string]string{ + "new_message": "is new", + } + require.Nil(msgpackrpc.CallWithCodec(codec, "Node.UpdateDrain", dereg, &drainResp)) + require.NotZero(drainResp.Index) + + out, err := state.NodeByID(nil, node.ID) + require.Nil(err) + require.Nil(out.DrainStrategy) + require.NotNil(out.LastDrain) + require.Equal(prevDrain, out.LastDrain) +} + func TestClientEndpoint_UpdateDrain_ACL(t *testing.T) { t.Parallel() diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 1aefc222bd4..8bc5d4bce7f 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -1015,45 +1015,49 @@ func (s *StateStore) updateNodeDrainImpl(txn *txn, index uint64, nodeID string, // Update LastDrain updateTime := time.Unix(updatedAt, 0) + + // if drain strategy isn't before or after, this wasn't a drain operation + // in that case, we don't care about .LastDrain + drainNoop := existingNode.DrainStrategy == nil && updatedNode.DrainStrategy == nil // when done with this method, updatedNode.LastDrain should be set - // this is either a new LastDrain struct or an update of the existing one - // // if starting a new drain operation, create a new LastDrain. otherwise, update the existing one. - // if already draining and LastDrain doesn't exist, we'll need to create a new one. - // this might happen if we upgrade/transition to 1.1 during a drain operation - if existingNode.DrainStrategy == nil && updatedNode.DrainStrategy != nil || - updatedNode.LastDrain == nil { - - updatedNode.LastDrain = &structs.DrainMetadata{ - StartedAt: updateTime, - UpdatedAt: updateTime, - AccessorID: accessorId, - Meta: drainMeta, - } - switch { - case updatedNode.DrainStrategy != nil: - updatedNode.LastDrain.Status = structs.DrainStatusDraining - case drainCompleted: - updatedNode.LastDrain.Status = structs.DrainStatusComplete - default: - updatedNode.LastDrain.Status = structs.DrainStatusCancelled + startedDraining := existingNode.DrainStrategy == nil && updatedNode.DrainStrategy != nil + // if already draining and LastDrain doesn't exist, we need to create a new one. + missingLastDrain := updatedNode.LastDrain == nil + if !drainNoop { + if startedDraining { + updatedNode.LastDrain = &structs.DrainMetadata{ + StartedAt: updateTime, + Meta: drainMeta, + } + } else if missingLastDrain { + updatedNode.LastDrain = &structs.DrainMetadata{ + // we don't have sub-second accuracy, so truncate this + StartedAt: time.Unix(existingNode.DrainStrategy.StartedAt.Unix(), 0), + Status: structs.DrainStatusDraining, + Meta: drainMeta, + } } - } else { + updatedNode.LastDrain.UpdatedAt = updateTime - if accessorId != "" { - // we won't have an accessor ID for drain complete; don't overwrite the existing one - updatedNode.LastDrain.AccessorID = accessorId - } + + // won't have new metadata on drain complete; keep the existing operator-provided metadata + // also, keep existing if they didn't provide it if drainMeta != nil { - // similarly, won't have metadata for drain complete; keep the existing operator-provided metadata updatedNode.LastDrain.Meta = drainMeta } - if updatedNode.DrainStrategy == nil { - if drainCompleted { - updatedNode.LastDrain.Status = structs.DrainStatusComplete - } else { - updatedNode.LastDrain.Status = structs.DrainStatusCancelled - } + + // we won't have an accessor ID for drain complete; don't overwrite the existing one + if accessorId != "" { + updatedNode.LastDrain.AccessorID = accessorId + } + + if updatedNode.DrainStrategy != nil { + updatedNode.LastDrain.Status = structs.DrainStatusDraining + } else if drainCompleted { + updatedNode.LastDrain.Status = structs.DrainStatusComplete + } else { + updatedNode.LastDrain.Status = structs.DrainStatusCancelled } } From a7cbda4e7d7866f6c4ccfa1c5477466c29e51528 Mon Sep 17 00:00:00 2001 From: Chris Baker <1675087+cgbaker@users.noreply.github.com> Date: Tue, 30 Mar 2021 22:03:04 +0000 Subject: [PATCH 07/19] clean up metadata handling for node drain CLI --- api/nodes_test.go | 13 ++++++------- command/agent/node_endpoint_test.go | 2 +- command/node_drain.go | 19 +++++++++---------- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/api/nodes_test.go b/api/nodes_test.go index ec0bd513714..589533d9b37 100644 --- a/api/nodes_test.go +++ b/api/nodes_test.go @@ -251,7 +251,7 @@ func TestNodes_ToggleDrain(t *testing.T) { drainMeta := map[string]string{ "reason": "this node needs to go", } - drainOut, err := nodes.UpdateDrainOpts(nodeID, DrainOptions{ + drainOut, err := nodes.UpdateDrainOpts(nodeID, &DrainOptions{ DrainSpec: spec, MarkEligible: false, Meta: drainMeta, @@ -269,8 +269,6 @@ func TestNodes_ToggleDrain(t *testing.T) { require.NoError(err) // we expect to see the node change to Drain:true and then back to Drain:false+ineligible - // sleep because of time round-off in LastDrain - time.Sleep(1 * time.Second) var sawDraining, sawDrainComplete uint64 for sawDrainComplete == 0 { select { @@ -285,12 +283,13 @@ func TestNodes_ToggleDrain(t *testing.T) { require.NotNil(node.LastDrain) require.Equal(DrainStatusDraining, node.LastDrain.Status) now := time.Now() - require.True(!node.LastDrain.StartedAt.Before(timeBeforeDrain) && !node.LastDrain.StartedAt.After(now), - "wanted %v <= %v <= %v", timeBeforeDrain, node.LastDrain.StartedAt, now) + require.False(node.LastDrain.StartedAt.Before(timeBeforeDrain), + "wanted %v <= %v", node.LastDrain.StartedAt, timeBeforeDrain) + require.False(node.LastDrain.StartedAt.After(now), + "wanted %v <= %v", node.LastDrain.StartedAt, now) require.Equal(drainMeta, node.LastDrain.Meta) sawDraining = node.ModifyIndex - } else if sawDraining != 0 && node.ModifyIndex > sawDraining && - !node.Drain && node.SchedulingEligibility == NodeSchedulingIneligible { + } else if sawDraining != 0 && !node.Drain && node.SchedulingEligibility == NodeSchedulingIneligible { require.NotNil(node.LastDrain) require.Equal(DrainStatusCompleted, node.LastDrain.Status) require.True(!node.LastDrain.UpdatedAt.Before(node.LastDrain.StartedAt)) diff --git a/command/agent/node_endpoint_test.go b/command/agent/node_endpoint_test.go index 098dcd59502..75832889e62 100644 --- a/command/agent/node_endpoint_test.go +++ b/command/agent/node_endpoint_test.go @@ -262,7 +262,7 @@ func TestHTTP_NodeDrain(t *testing.T) { }, } - beforeDrain := time.Now().Add(-1 * time.Second) // handle roundoff + beforeDrain := time.Unix(time.Now().Unix(), 0) // Make the HTTP request buf := encodeReq(drainReq) diff --git a/command/node_drain.go b/command/node_drain.go index 480fd8942e6..f4ee7ca8968 100644 --- a/command/node_drain.go +++ b/command/node_drain.go @@ -311,12 +311,10 @@ func (c *NodeDrainCommand) Run(args []string) int { } } - // fill drain metadata map, copy existing map if we're updating or cancelling drain - var drainMeta map[string]string - if node.DrainStrategy != nil && node.LastDrain != nil && node.LastDrain.Meta != nil { + // copy drain if cancelling and we have -m or -meta + drainMeta := make(map[string]string) + if disable && node.LastDrain != nil && node.LastDrain.Meta != nil { drainMeta = node.LastDrain.Meta - } else { - drainMeta = make(map[string]string) } if message != "" { if enable { @@ -338,11 +336,12 @@ func (c *NodeDrainCommand) Run(args []string) int { } // Toggle node draining - drainResponse, err := client.Nodes().UpdateDrainOpts(node.ID, &api.DrainOptions{ - DrainSpec: spec, - MarkEligible: !keepIneligible, - Meta: drainMeta, - }, nil) + drainResponse, err := client.Nodes().UpdateDrainOpts(node.ID, + &api.DrainOptions{ + DrainSpec: spec, + MarkEligible: !keepIneligible, + Meta: drainMeta, + }, nil) if err != nil { c.Ui.Error(fmt.Sprintf("Error updating drain specification: %s", err)) return 1 From c9bf393fb38e48d3218e079045e25097693439d1 Mon Sep 17 00:00:00 2001 From: Chris Baker <1675087+cgbaker@users.noreply.github.com> Date: Tue, 30 Mar 2021 22:10:38 +0000 Subject: [PATCH 08/19] clarified some code around node drain in the state store --- nomad/state/state_store.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 8bc5d4bce7f..b07494fe4f7 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -1016,23 +1016,23 @@ func (s *StateStore) updateNodeDrainImpl(txn *txn, index uint64, nodeID string, // Update LastDrain updateTime := time.Unix(updatedAt, 0) - // if drain strategy isn't before or after, this wasn't a drain operation + // if drain strategy isn't set before or after, this wasn't a drain operation // in that case, we don't care about .LastDrain drainNoop := existingNode.DrainStrategy == nil && updatedNode.DrainStrategy == nil - // when done with this method, updatedNode.LastDrain should be set + // otherwise, when done with this method, updatedNode.LastDrain should be set // if starting a new drain operation, create a new LastDrain. otherwise, update the existing one. startedDraining := existingNode.DrainStrategy == nil && updatedNode.DrainStrategy != nil - // if already draining and LastDrain doesn't exist, we need to create a new one. - missingLastDrain := updatedNode.LastDrain == nil if !drainNoop { if startedDraining { updatedNode.LastDrain = &structs.DrainMetadata{ StartedAt: updateTime, Meta: drainMeta, } - } else if missingLastDrain { + } else if updatedNode.LastDrain == nil { + // if already draining and LastDrain doesn't exist, we need to create a new one + // this could happen if we upgraded to 1.1.x during a drain updatedNode.LastDrain = &structs.DrainMetadata{ - // we don't have sub-second accuracy, so truncate this + // we don't have sub-second accuracy on these fields, so truncate this StartedAt: time.Unix(existingNode.DrainStrategy.StartedAt.Unix(), 0), Status: structs.DrainStatusDraining, Meta: drainMeta, @@ -1043,11 +1043,11 @@ func (s *StateStore) updateNodeDrainImpl(txn *txn, index uint64, nodeID string, // won't have new metadata on drain complete; keep the existing operator-provided metadata // also, keep existing if they didn't provide it - if drainMeta != nil { + if len(drainMeta) != 0 { updatedNode.LastDrain.Meta = drainMeta } - // we won't have an accessor ID for drain complete; don't overwrite the existing one + // we won't have an accessor ID on drain complete, so don't overwrite the existing one if accessorId != "" { updatedNode.LastDrain.AccessorID = accessorId } From 5d91dda41cc05a4401ca51923cc87b6e23aaf621 Mon Sep 17 00:00:00 2001 From: Chris Baker <1675087+cgbaker@users.noreply.github.com> Date: Wed, 31 Mar 2021 11:30:46 +0000 Subject: [PATCH 09/19] changelog for #10250 --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cecb6145679..8f77968c412 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ IMPROVEMENTS: * consul/connect: Added job-submission validation for Connect sidecar service and group names [[GH-10455](https://github.com/hashicorp/nomad/pull/10455)] * consul/connect: Automatically populate `CONSUL_HTTP_ADDR` for connect native tasks in host networking mode. [[GH-10239](https://github.com/hashicorp/nomad/issues/10239)] * consul/connect: Added `disable_default_tcp_check` field to `connect.sidecar_service` blocks to disable the default TCP listener check for Connect sidecar tasks. [[GH-10531](https://github.com/hashicorp/nomad/pull/10531)] + * core: Persist metadata about most recent drain in Node.LastDrain [[GH-10250](https://github.com/hashicorp/nomad/issues/10250)] * csi: Added support for jobs to request a unique volume ID per allocation. [[GH-10136](https://github.com/hashicorp/nomad/issues/10136)] * driver/docker: Added support for optional extra container labels. [[GH-9885](https://github.com/hashicorp/nomad/issues/9885)] * driver/docker: Added support for configuring default logger behavior in the client configuration. [[GH-10156](https://github.com/hashicorp/nomad/issues/10156)] From cb1c8daadf7dc4d8418401f9b7c4583dd85332ee Mon Sep 17 00:00:00 2001 From: Chris Baker <1675087+cgbaker@users.noreply.github.com> Date: Wed, 31 Mar 2021 11:48:22 +0000 Subject: [PATCH 10/19] corrected CLI documentation for `-meta` --- command/node_drain.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/command/node_drain.go b/command/node_drain.go index f4ee7ca8968..0911a68d8be 100644 --- a/command/node_drain.go +++ b/command/node_drain.go @@ -77,7 +77,7 @@ Node Drain Options: Message for the drain update operation. Registered in drain metadata as "message" for drain enable and "cancel_message" for drain disable. - --meta = + -meta = Custom metadata to store on thed drain operation, can be used multiple times. -self @@ -105,7 +105,7 @@ func (c *NodeDrainCommand) AutocompleteFlags() complete.Flags { "-ignore-system": complete.PredictNothing, "-keep-ineligible": complete.PredictNothing, "-m": complete.PredictNothing, - "--meta": complete.PredictNothing, + "-meta": complete.PredictNothing, "-self": complete.PredictNothing, "-yes": complete.PredictNothing, }) From 25fb1cc10bbc88d984d84eb7b84169a83e2c48c2 Mon Sep 17 00:00:00 2001 From: Chris Baker <1675087+cgbaker@users.noreply.github.com> Date: Wed, 31 Mar 2021 12:21:56 +0000 Subject: [PATCH 11/19] node drain metadata documentation (API and CLI) some test comments --- command/node_drain.go | 4 +-- nomad/node_endpoint_test.go | 23 +++++++++--- website/content/api-docs/nodes.mdx | 38 +++++++++++++++++--- website/content/docs/commands/node/drain.mdx | 8 ++++- 4 files changed, 61 insertions(+), 12 deletions(-) diff --git a/command/node_drain.go b/command/node_drain.go index 0911a68d8be..caa34bf0700 100644 --- a/command/node_drain.go +++ b/command/node_drain.go @@ -75,10 +75,10 @@ Node Drain Options: -m Message for the drain update operation. Registered in drain metadata as - "message" for drain enable and "cancel_message" for drain disable. + "message" during drain enable and "cancel_message" during drain disable. -meta = - Custom metadata to store on thed drain operation, can be used multiple times. + Custom metadata to store on the drain operation, can be used multiple times. -self Set the drain status of the local node. diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go index a35515735eb..5d5c323ccba 100644 --- a/nomad/node_endpoint_test.go +++ b/nomad/node_endpoint_test.go @@ -870,6 +870,10 @@ func TestClientEndpoint_UpdateStatus_HeartbeatOnly_Advertise(t *testing.T) { require.Equal(resp.Servers[0].RPCAdvertiseAddr, advAddr) } +// TestClientEndpoint_UpdateDrain asserts the ability to initiate drain +// against a node and cancel that drain. It also asserts: +// * an evaluation is created when the node becomes eligible +// * drain metadata is properly persisted in Node.LastDrain func TestClientEndpoint_UpdateDrain(t *testing.T) { t.Parallel() require := require.New(t) @@ -984,6 +988,9 @@ func TestClientEndpoint_UpdateDrain(t *testing.T) { require.Len(out.Events, 4) } +// TestClientEndpoint_UpdatedDrainAndCompleted asserts that drain metadata +// is properly persisted in Node.LastDrain as the node drain is updated and +// completes. func TestClientEndpoint_UpdatedDrainAndCompleted(t *testing.T) { t.Parallel() require := require.New(t) @@ -1019,7 +1026,7 @@ func TestClientEndpoint_UpdatedDrainAndCompleted(t *testing.T) { NodeID: node.ID, DrainStrategy: strategy, Meta: map[string]string{ - "message": "first", + "message": "first drain", }, WriteRequest: structs.WriteRequest{Region: "global"}, } @@ -1037,14 +1044,14 @@ func TestClientEndpoint_UpdatedDrainAndCompleted(t *testing.T) { StartedAt: firstDrainUpdate, UpdatedAt: firstDrainUpdate, Status: structs.DrainStatusDraining, - Meta: map[string]string{"message": "first"}, + Meta: map[string]string{"message": "first drain"}, }, *out.LastDrain) time.Sleep(1 * time.Second) // Update the drain dereg.DrainStrategy.DrainSpec.Deadline *= 2 - dereg.Meta["message"] = "second" + dereg.Meta["message"] = "second drain" require.Nil(msgpackrpc.CallWithCodec(codec, "Node.UpdateDrain", dereg, &resp2)) require.NotZero(resp2.Index) @@ -1058,7 +1065,7 @@ func TestClientEndpoint_UpdatedDrainAndCompleted(t *testing.T) { StartedAt: firstDrainUpdate, UpdatedAt: secondDrainUpdate, Status: structs.DrainStatusDraining, - Meta: map[string]string{"message": "second"}, + Meta: map[string]string{"message": "second drain"}, }, *out.LastDrain) time.Sleep(1 * time.Second) @@ -1084,10 +1091,13 @@ func TestClientEndpoint_UpdatedDrainAndCompleted(t *testing.T) { StartedAt: firstDrainUpdate, UpdatedAt: out.LastDrain.UpdatedAt, Status: structs.DrainStatusComplete, - Meta: map[string]string{"message": "second"}, + Meta: map[string]string{"message": "second drain"}, }, *out.LastDrain) } +// TestClientEndpoint_UpdatedDrainNoop asserts that drain metadata is properly +// persisted in Node.LastDrain when calls to Node.UpdateDrain() don't affect +// the drain status. func TestClientEndpoint_UpdatedDrainNoop(t *testing.T) { t.Parallel() require := require.New(t) @@ -1160,6 +1170,9 @@ func TestClientEndpoint_UpdatedDrainNoop(t *testing.T) { require.Equal(prevDrain, out.LastDrain) } +// TestClientEndpoint_UpdateDrain_ACL asserts that Node.UpdateDrain() enforces +// node.write ACLs, and that token accessor ID is properly persisted in +// Node.LastDrain.AccessorID func TestClientEndpoint_UpdateDrain_ACL(t *testing.T) { t.Parallel() diff --git a/website/content/api-docs/nodes.mdx b/website/content/api-docs/nodes.mdx index 74656512f35..a01b1a0b868 100644 --- a/website/content/api-docs/nodes.mdx +++ b/website/content/api-docs/nodes.mdx @@ -112,6 +112,7 @@ $ curl \ } }, "ID": "f7476465-4d6e-c0de-26d0-e383c49be941", + "LastDrain": null, "ModifyIndex": 2526, "Name": "nomad-4", "NodeClass": "", @@ -180,7 +181,7 @@ $ curl \ "memory.totalbytes": "16571674624", "nomad.advertise.address": "127.0.0.1:4646", "nomad.revision": "30da2b8f6c3aa860113c9d313c695a05eff5bb97+CHANGES", - "nomad.version": "0.10.0-dev", + "nomad.version": "1.1.0", "os.name": "nixos", "os.signals": "SIGTTOU,SIGTTIN,SIGSTOP,SIGSYS,SIGXCPU,SIGBUS,SIGKILL,SIGTERM,SIGIOT,SIGILL,SIGIO,SIGQUIT,SIGSEGV,SIGUSR1,SIGXFSZ,SIGCHLD,SIGUSR2,SIGURG,SIGFPE,SIGHUP,SIGINT,SIGPROF,SIGCONT,SIGALRM,SIGPIPE,SIGTRAP,SIGTSTP,SIGWINCH,SIGABRT", "os.version": "\"19.03.173017.85f820d6e41 (Koi)\"", @@ -261,11 +262,25 @@ $ curl \ }, "Events": [ { - "CreateIndex": 0, + "CreateIndex": 6, "Details": null, "Message": "Node registered", "Subsystem": "Cluster", - "Timestamp": "2019-08-26T12:22:50+02:00" + "Timestamp": "2021-03-31T12:11:39Z" + }, + { + "CreateIndex": 11, + "Details": null, + "Message": "Node drain strategy set", + "Subsystem": "Drain", + "Timestamp": "2021-03-31T12:12:20.213412Z" + }, + { + "CreateIndex": 12, + "Details": null, + "Message": "Node drain complete", + "Subsystem": "Drain", + "Timestamp": "2021-03-31T12:12:20.213639Z" } ], "HTTPAddr": "127.0.0.1:4646", @@ -282,6 +297,15 @@ $ curl \ } }, "ID": "1ac61e33-a465-2ace-f63f-cffa1285e7eb", + "LastDrain": { + "AccessorID": "4e1b7ce1-f8aa-d7ff-09f1-55c3a0fd3988", + "Meta": { + "message": "node maintenance" + }, + "StartedAt": "2021-03-31T12:12:20Z", + "Status": "complete", + "UpdatedAt": "2021-03-31T12:12:20Z" + }, "Links": { "consul": "dc1.mew" }, @@ -289,7 +313,7 @@ $ curl \ "connect.log_level": "info", "connect.sidecar_image": "envoyproxy/envoy:v1.11.1" }, - "ModifyIndex": 9, + "ModifyIndex": 14, "Name": "mew", "NodeClass": "", "NodeResources": { @@ -929,6 +953,9 @@ The table below shows this endpoint's support for - `MarkEligible` `(bool: false)` - Specifies whether to mark a node as eligible for scheduling again when _disabling_ a drain. +- `Meta` `(json: )` - A JSON map of strings with drain operation + metadata that will be persisted in `.LastDrain.Meta`. + ### Sample Payload ```json @@ -936,6 +963,9 @@ The table below shows this endpoint's support for "DrainSpec": { "Deadline": 3600000000000, "IgnoreSystemJobs": true + }, + "Meta": { + "message": "drain for maintenance" } } ``` diff --git a/website/content/docs/commands/node/drain.mdx b/website/content/docs/commands/node/drain.mdx index 7eac8ae9cd5..171304bed2e 100644 --- a/website/content/docs/commands/node/drain.mdx +++ b/website/content/docs/commands/node/drain.mdx @@ -79,6 +79,12 @@ capability. existing drain is being cancelled but additional scheduling on the node is not desired. +- `-m`: Message for the drain update operation. Registered in drain metadata as + `"message"` during drain enable and `"cancel_message"` during drain disable. + +- `-meta =`: Custom metadata to store on the drain operation, can be + used multiple times. + - `-self`: Drain the local node. - `-yes`: Automatic yes to prompts. @@ -88,7 +94,7 @@ capability. Enable drain mode on node with ID prefix "4d2ba53b": ```shell-session -$ nomad node drain -enable f4e8a9e5 +$ nomad node drain -enable f4e8a9e5 -m "node maintenance" Are you sure you want to enable drain mode for node "f4e8a9e5-30d8-3536-1e6f-cda5c869c35e"? [y/N] y 2018-03-30T23:13:16Z: Ctrl-C to stop monitoring: will not cancel the node drain 2018-03-30T23:13:16Z: Node "f4e8a9e5-30d8-3536-1e6f-cda5c869c35e" drain strategy set From b83eae2cc236019436044e8eb63e191a10a94fef Mon Sep 17 00:00:00 2001 From: Chris Baker <1675087+cgbaker@users.noreply.github.com> Date: Thu, 1 Apr 2021 17:03:26 -0500 Subject: [PATCH 12/19] Update nomad/state/state_store_test.go --- nomad/state/state_store_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index 3d9c217f82f..4c4706e496d 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -965,7 +965,6 @@ func TestStateStore_BatchUpdateNodeDrain(t *testing.T) { require.Nil(err) require.NotNil(out.DrainStrategy) require.Equal(out.DrainStrategy, expectedDrain) - // TODO: cgbaker: more tests for LastDrain require.NotNil(out.LastDrain) require.Equal(structs.DrainStatusDraining, out.LastDrain.Status) require.Len(out.Events, 2) From c5e8c7f301babefcfe8b64e1c7561af58902168b Mon Sep 17 00:00:00 2001 From: Chris Baker <1675087+cgbaker@users.noreply.github.com> Date: Thu, 1 Apr 2021 17:03:58 -0500 Subject: [PATCH 13/19] Update nomad/state/state_store_test.go --- nomad/state/state_store_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index 4c4706e496d..5c89d2c6b0d 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -1010,7 +1010,6 @@ func TestStateStore_UpdateNodeDrain_Node(t *testing.T) { out, err := state.NodeByID(ws, node.ID) require.Nil(err) require.NotNil(out.DrainStrategy) - // TODO: cgbaker: more tests for LastDrain require.NotNil(out.LastDrain) require.Equal(structs.DrainStatusDraining, out.LastDrain.Status) require.Equal(out.DrainStrategy, expectedDrain) From bb85f0700d2a907827fc6251396ef405fae3179a Mon Sep 17 00:00:00 2001 From: Chris Baker <1675087+cgbaker@users.noreply.github.com> Date: Thu, 1 Apr 2021 17:04:30 -0500 Subject: [PATCH 14/19] Update nomad/state/state_store_test.go --- nomad/state/state_store_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index 5c89d2c6b0d..0856fedcc8e 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -1156,7 +1156,6 @@ func TestStateStore_UpdateNodeDrain_ResetEligiblity(t *testing.T) { require.Nil(err) require.Nil(out.DrainStrategy) require.Equal(out.SchedulingEligibility, structs.NodeSchedulingEligible) - // TODO: cgbaker: more tests for LastDrain require.NotNil(out.LastDrain) require.Equal(structs.DrainStatusCancelled, out.LastDrain.Status) require.Equal(time.Unix(7, 0), out.LastDrain.StartedAt) From a222ac6d03310d35955ab37b9b4eb5508e5c0743 Mon Sep 17 00:00:00 2001 From: Chris Baker <1675087+cgbaker@users.noreply.github.com> Date: Thu, 8 Apr 2021 10:24:21 -0500 Subject: [PATCH 15/19] Update command/node_drain.go --- command/node_drain.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/command/node_drain.go b/command/node_drain.go index caa34bf0700..38cab32840a 100644 --- a/command/node_drain.go +++ b/command/node_drain.go @@ -311,7 +311,7 @@ func (c *NodeDrainCommand) Run(args []string) int { } } - // copy drain if cancelling and we have -m or -meta + // propagate drain metadata if cancelling drainMeta := make(map[string]string) if disable && node.LastDrain != nil && node.LastDrain.Meta != nil { drainMeta = node.LastDrain.Meta From 6584699be7fbac64cf32e6df496b5907ba58848a Mon Sep 17 00:00:00 2001 From: Chris Baker <1675087+cgbaker@users.noreply.github.com> Date: Thu, 8 Apr 2021 10:31:59 -0500 Subject: [PATCH 16/19] Update nomad/state/state_store.go --- nomad/state/state_store.go | 1 - 1 file changed, 1 deletion(-) diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index b07494fe4f7..4bccf2723bb 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -1034,7 +1034,6 @@ func (s *StateStore) updateNodeDrainImpl(txn *txn, index uint64, nodeID string, updatedNode.LastDrain = &structs.DrainMetadata{ // we don't have sub-second accuracy on these fields, so truncate this StartedAt: time.Unix(existingNode.DrainStrategy.StartedAt.Unix(), 0), - Status: structs.DrainStatusDraining, Meta: drainMeta, } } From c504359607e165ad5f449c95078c9ecc8f22d56b Mon Sep 17 00:00:00 2001 From: Luiz Aoqui Date: Fri, 7 May 2021 11:35:59 -0400 Subject: [PATCH 17/19] set node drain status as a custom type --- api/nodes.go | 10 ++++++---- nomad/state/state_store.go | 2 +- nomad/structs/structs.go | 12 +++++++----- vendor/github.com/hashicorp/nomad/api/nodes.go | 10 ++++++---- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/api/nodes.go b/api/nodes.go index 18a4e09bea3..5eac71a8185 100644 --- a/api/nodes.go +++ b/api/nodes.go @@ -19,9 +19,9 @@ const ( NodeSchedulingEligible = "eligible" NodeSchedulingIneligible = "ineligible" - DrainStatusDraining = "draining" - DrainStatusCompleted = "complete" - DrainStatusCancelled = "cancelled" + DrainStatusDraining DrainStatus = "draining" + DrainStatusComplete DrainStatus = "complete" + DrainStatusCanceled DrainStatus = "canceled" ) // Nodes is used to query node-related API endpoints @@ -506,11 +506,13 @@ type HostVolumeInfo struct { ReadOnly bool } +type DrainStatus string + // DrainMetadata contains information about the most recent drain operation for a given Node. type DrainMetadata struct { StartedAt time.Time UpdatedAt time.Time - Status string + Status DrainStatus AccessorID string Meta map[string]string } diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 4bccf2723bb..eb4bc93d87e 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -1056,7 +1056,7 @@ func (s *StateStore) updateNodeDrainImpl(txn *txn, index uint64, nodeID string, } else if drainCompleted { updatedNode.LastDrain.Status = structs.DrainStatusComplete } else { - updatedNode.LastDrain.Status = structs.DrainStatusCancelled + updatedNode.LastDrain.Status = structs.DrainStatusCanceled } } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 5a71dc07290..418d48e5d59 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1768,11 +1768,13 @@ func (d *DrainStrategy) Equal(o *DrainStrategy) bool { const ( // DrainStatuses are the various states a drain can be in, as reflect in DrainMetadata - DrainStatusDraining = "draining" - DrainStatusComplete = "complete" - DrainStatusCancelled = "cancelled" + DrainStatusDraining DrainStatus = "draining" + DrainStatusComplete DrainStatus = "complete" + DrainStatusCanceled DrainStatus = "canceled" ) +type DrainStatus string + // DrainMetadata contains information about the most recent drain operation for a given Node. type DrainMetadata struct { // StartedAt is the time that the drain operation started. This is equal to Node.DrainStrategy.StartedAt, @@ -1783,8 +1785,8 @@ type DrainMetadata struct { // or drain completion UpdatedAt time.Time - // Status reflects the status of the drain operation: "draining", "completed", "cancelled" - Status string + // Status reflects the status of the drain operation. + Status DrainStatus // AccessorID is the accessor ID of the ACL token used in the most recent API operation against this drain AccessorID string diff --git a/vendor/github.com/hashicorp/nomad/api/nodes.go b/vendor/github.com/hashicorp/nomad/api/nodes.go index 18a4e09bea3..5eac71a8185 100644 --- a/vendor/github.com/hashicorp/nomad/api/nodes.go +++ b/vendor/github.com/hashicorp/nomad/api/nodes.go @@ -19,9 +19,9 @@ const ( NodeSchedulingEligible = "eligible" NodeSchedulingIneligible = "ineligible" - DrainStatusDraining = "draining" - DrainStatusCompleted = "complete" - DrainStatusCancelled = "cancelled" + DrainStatusDraining DrainStatus = "draining" + DrainStatusComplete DrainStatus = "complete" + DrainStatusCanceled DrainStatus = "canceled" ) // Nodes is used to query node-related API endpoints @@ -506,11 +506,13 @@ type HostVolumeInfo struct { ReadOnly bool } +type DrainStatus string + // DrainMetadata contains information about the most recent drain operation for a given Node. type DrainMetadata struct { StartedAt time.Time UpdatedAt time.Time - Status string + Status DrainStatus AccessorID string Meta map[string]string } From cd4dddfff6c3a540a6c6690c36ac74c552bd7d09 Mon Sep 17 00:00:00 2001 From: Luiz Aoqui Date: Fri, 7 May 2021 11:56:57 -0400 Subject: [PATCH 18/19] fix test for node drain metadata --- api/nodes_test.go | 2 +- command/agent/node_endpoint_test.go | 4 ++-- nomad/node_endpoint_test.go | 2 +- nomad/state/state_store_test.go | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/api/nodes_test.go b/api/nodes_test.go index 589533d9b37..5dab49c8481 100644 --- a/api/nodes_test.go +++ b/api/nodes_test.go @@ -291,7 +291,7 @@ func TestNodes_ToggleDrain(t *testing.T) { sawDraining = node.ModifyIndex } else if sawDraining != 0 && !node.Drain && node.SchedulingEligibility == NodeSchedulingIneligible { require.NotNil(node.LastDrain) - require.Equal(DrainStatusCompleted, node.LastDrain.Status) + require.Equal(DrainStatusComplete, node.LastDrain.Status) require.True(!node.LastDrain.UpdatedAt.Before(node.LastDrain.StartedAt)) require.Equal(drainMeta, node.LastDrain.Meta) sawDrainComplete = node.ModifyIndex diff --git a/command/agent/node_endpoint_test.go b/command/agent/node_endpoint_test.go index 75832889e62..144f219de21 100644 --- a/command/agent/node_endpoint_test.go +++ b/command/agent/node_endpoint_test.go @@ -322,12 +322,12 @@ func TestHTTP_NodeDrain(t *testing.T) { require.NotNil(out.LastDrain) require.False(out.LastDrain.StartedAt.Before(beforeDrain)) require.False(out.LastDrain.UpdatedAt.Before(out.LastDrain.StartedAt)) - require.Contains([]string{structs.DrainStatusCancelled, structs.DrainStatusComplete}, out.LastDrain.Status) + require.Contains([]string{structs.DrainStatusCanceled, structs.DrainStatusComplete}, out.LastDrain.Status) if out.LastDrain.Status == structs.DrainStatusComplete { require.Equal(map[string]string{ "reason": "drain", }, out.LastDrain.Meta) - } else if out.LastDrain.Status == structs.DrainStatusCancelled { + } else if out.LastDrain.Status == structs.DrainStatusCanceled { require.Equal(map[string]string{ "cancel_reason": "changed my mind", }, out.LastDrain.Meta) diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go index 5d5c323ccba..188fc50f590 100644 --- a/nomad/node_endpoint_test.go +++ b/nomad/node_endpoint_test.go @@ -975,7 +975,7 @@ func TestClientEndpoint_UpdateDrain(t *testing.T) { require.Equal(structs.DrainMetadata{ StartedAt: out.LastDrain.StartedAt, UpdatedAt: out.LastDrain.UpdatedAt, - Status: structs.DrainStatusCancelled, + Status: structs.DrainStatusCanceled, Meta: map[string]string{"cancelled": "yes"}, }, *out.LastDrain) diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index 0856fedcc8e..3d71a1dd66b 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -1157,7 +1157,7 @@ func TestStateStore_UpdateNodeDrain_ResetEligiblity(t *testing.T) { require.Nil(out.DrainStrategy) require.Equal(out.SchedulingEligibility, structs.NodeSchedulingEligible) require.NotNil(out.LastDrain) - require.Equal(structs.DrainStatusCancelled, out.LastDrain.Status) + require.Equal(structs.DrainStatusCanceled, out.LastDrain.Status) require.Equal(time.Unix(7, 0), out.LastDrain.StartedAt) require.Equal(time.Unix(9, 0), out.LastDrain.UpdatedAt) require.Len(out.Events, 3) From 180f8a58b90d1b9d654e5d02301146bd584ad32b Mon Sep 17 00:00:00 2001 From: Luiz Aoqui Date: Fri, 7 May 2021 12:58:52 -0400 Subject: [PATCH 19/19] more test fixes --- command/agent/node_endpoint_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/command/agent/node_endpoint_test.go b/command/agent/node_endpoint_test.go index 144f219de21..c5ec234be48 100644 --- a/command/agent/node_endpoint_test.go +++ b/command/agent/node_endpoint_test.go @@ -322,7 +322,7 @@ func TestHTTP_NodeDrain(t *testing.T) { require.NotNil(out.LastDrain) require.False(out.LastDrain.StartedAt.Before(beforeDrain)) require.False(out.LastDrain.UpdatedAt.Before(out.LastDrain.StartedAt)) - require.Contains([]string{structs.DrainStatusCanceled, structs.DrainStatusComplete}, out.LastDrain.Status) + require.Contains([]structs.DrainStatus{structs.DrainStatusCanceled, structs.DrainStatusComplete}, out.LastDrain.Status) if out.LastDrain.Status == structs.DrainStatusComplete { require.Equal(map[string]string{ "reason": "drain",