Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Node Drain Metadata #10250

Merged
merged 19 commits into from
May 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ IMPROVEMENTS:
* consul/connect: Added job-submission validation for Connect sidecar service and group names [[GH-10455](https://github.com/hashicorp/nomad/pull/10455)]
* consul/connect: Automatically populate `CONSUL_HTTP_ADDR` for connect native tasks in host networking mode. [[GH-10239](https://github.com/hashicorp/nomad/issues/10239)]
* consul/connect: Added `disable_default_tcp_check` field to `connect.sidecar_service` blocks to disable the default TCP listener check for Connect sidecar tasks. [[GH-10531](https://github.com/hashicorp/nomad/pull/10531)]
* core: Persist metadata about most recent drain in Node.LastDrain [[GH-10250](https://github.com/hashicorp/nomad/issues/10250)]
* csi: Added support for jobs to request a unique volume ID per allocation. [[GH-10136](https://github.com/hashicorp/nomad/issues/10136)]
* driver/docker: Added support for optional extra container labels. [[GH-9885](https://github.com/hashicorp/nomad/issues/9885)]
* driver/docker: Added support for configuring default logger behavior in the client configuration. [[GH-10156](https://github.com/hashicorp/nomad/issues/10156)]
Expand Down
2 changes: 1 addition & 1 deletion api/event_stream_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ func TestEventStream_PayloadValue(t *testing.T) {
require.Contains(t, raw, "Node")
rawNode := raw["Node"]
require.Equal(t, n.ID, rawNode["ID"])
require.NotContains(t, rawNode, "SecretID")
require.Empty(t, rawNode["SecretID"])
}
case <-time.After(5 * time.Second):
require.Fail(t, "failed waiting for event stream event")
Expand Down
55 changes: 53 additions & 2 deletions api/nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ const (
// status being ready.
NodeSchedulingEligible = "eligible"
NodeSchedulingIneligible = "ineligible"

DrainStatusDraining DrainStatus = "draining"
DrainStatusComplete DrainStatus = "complete"
DrainStatusCanceled DrainStatus = "canceled"
)

// Nodes is used to query node-related API endpoints
Expand Down Expand Up @@ -67,6 +71,9 @@ type NodeUpdateDrainRequest struct {
// MarkEligible marks the node as eligible for scheduling if removing
// the drain strategy.
MarkEligible bool

// Meta allows operators to specify metadata related to the drain operation
Meta map[string]string
}

// NodeDrainUpdateResponse is used to respond to a node drain update
Expand All @@ -77,14 +84,45 @@ type NodeDrainUpdateResponse struct {
WriteMeta
}

// DrainOptions is used to pass through node drain parameters
type DrainOptions struct {
// DrainSpec contains the drain specification for the node. If non-nil,
// the node will be marked ineligible and begin/continue draining according
// to the provided drain spec.
// If nil, any existing drain operation will be canceled.
DrainSpec *DrainSpec

// MarkEligible indicates whether the node should be marked as eligible when
// canceling a drain operation.
MarkEligible bool

// Meta is metadata that is persisted in Node.LastDrain about this
// drain update.
Meta map[string]string
}

// UpdateDrain is used to update the drain strategy for a given node. If
// markEligible is true and the drain is being removed, the node will be marked
// as having its scheduling being eligible
func (n *Nodes) UpdateDrain(nodeID string, spec *DrainSpec, markEligible bool, q *WriteOptions) (*NodeDrainUpdateResponse, error) {
req := &NodeUpdateDrainRequest{
NodeID: nodeID,
resp, err := n.UpdateDrainOpts(nodeID, &DrainOptions{
DrainSpec: spec,
MarkEligible: markEligible,
Meta: nil,
}, q)
return resp, err
}

Copy link
Contributor Author

@cgbaker cgbaker Apr 1, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

needed a new method, followed the Jobs().RegisterOpts() pattern so we'll never need to do this again 🤞

// UpdateDrainWithMeta is used to update the drain strategy for a given node. If
// markEligible is true and the drain is being removed, the node will be marked
// as having its scheduling being eligible
func (n *Nodes) UpdateDrainOpts(nodeID string, opts *DrainOptions, q *WriteOptions) (*NodeDrainUpdateResponse,
error) {
req := &NodeUpdateDrainRequest{
NodeID: nodeID,
DrainSpec: opts.DrainSpec,
MarkEligible: opts.MarkEligible,
Meta: opts.Meta,
}

var resp NodeDrainUpdateResponse
Expand Down Expand Up @@ -468,6 +506,17 @@ type HostVolumeInfo struct {
ReadOnly bool
}

type DrainStatus string

// DrainMetadata contains information about the most recent drain operation for a given Node.
type DrainMetadata struct {
StartedAt time.Time
UpdatedAt time.Time
Status DrainStatus
AccessorID string
Meta map[string]string
}

// Node is used to deserialize a node entry.
type Node struct {
ID string
Expand All @@ -494,6 +543,7 @@ type Node struct {
HostVolumes map[string]*HostVolumeInfo
CSIControllerPlugins map[string]*CSIInfo
CSINodePlugins map[string]*CSIInfo
LastDrain *DrainMetadata
CreateIndex uint64
ModifyIndex uint64
}
Expand Down Expand Up @@ -789,6 +839,7 @@ type NodeListStub struct {
Drivers map[string]*DriverInfo
NodeResources *NodeResources `json:",omitempty"`
ReservedResources *NodeReservedResources `json:",omitempty"`
LastDrain *DrainMetadata
CreateIndex uint64
ModifyIndex uint64
}
Expand Down
26 changes: 23 additions & 3 deletions api/nodes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,12 +241,21 @@ func TestNodes_ToggleDrain(t *testing.T) {
out, _, err := nodes.Info(nodeID, nil)
require.Nil(err)
require.False(out.Drain)
require.Nil(out.LastDrain)

// Toggle it on
timeBeforeDrain := time.Now().Add(-1 * time.Second)
spec := &DrainSpec{
Deadline: 10 * time.Second,
}
drainOut, err := nodes.UpdateDrain(nodeID, spec, false, nil)
drainMeta := map[string]string{
"reason": "this node needs to go",
}
drainOut, err := nodes.UpdateDrainOpts(nodeID, &DrainOptions{
DrainSpec: spec,
MarkEligible: false,
Meta: drainMeta,
}, nil)
require.Nil(err)
assertWriteMeta(t, &drainOut.WriteMeta)

Expand All @@ -271,9 +280,20 @@ func TestNodes_ToggleDrain(t *testing.T) {
require.Equal(node.DrainStrategy != nil, node.Drain)
require.True(!node.Drain || node.SchedulingEligibility == NodeSchedulingIneligible) // node.Drain => "ineligible"
if node.Drain && node.SchedulingEligibility == NodeSchedulingIneligible {
require.NotNil(node.LastDrain)
require.Equal(DrainStatusDraining, node.LastDrain.Status)
now := time.Now()
require.False(node.LastDrain.StartedAt.Before(timeBeforeDrain),
"wanted %v <= %v", node.LastDrain.StartedAt, timeBeforeDrain)
require.False(node.LastDrain.StartedAt.After(now),
"wanted %v <= %v", node.LastDrain.StartedAt, now)
require.Equal(drainMeta, node.LastDrain.Meta)
sawDraining = node.ModifyIndex
} else if sawDraining != 0 && node.ModifyIndex > sawDraining &&
!node.Drain && node.SchedulingEligibility == NodeSchedulingIneligible {
} else if sawDraining != 0 && !node.Drain && node.SchedulingEligibility == NodeSchedulingIneligible {
require.NotNil(node.LastDrain)
require.Equal(DrainStatusComplete, node.LastDrain.Status)
require.True(!node.LastDrain.UpdatedAt.Before(node.LastDrain.StartedAt))
require.Equal(drainMeta, node.LastDrain.Meta)
sawDrainComplete = node.ModifyIndex
}
}
Expand Down
1 change: 1 addition & 0 deletions command/agent/node_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ func (s *HTTPServer) nodeToggleDrain(resp http.ResponseWriter, req *http.Request
args := structs.NodeUpdateDrainRequest{
NodeID: nodeID,
MarkEligible: drainRequest.MarkEligible,
Meta: drainRequest.Meta,
}
if drainRequest.DrainSpec != nil {
args.DrainStrategy = &structs.DrainStrategy{
Expand Down
60 changes: 43 additions & 17 deletions command/agent/node_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,13 @@ func TestHTTP_NodesList(t *testing.T) {
}

// Check for the index
if respW.HeaderMap.Get("X-Nomad-Index") == "" {
if respW.Header().Get("X-Nomad-Index") == "" {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

all of these are because HeaderMap is deprecated, thought I'd clean up while I was in here

t.Fatalf("missing index")
}
if respW.HeaderMap.Get("X-Nomad-KnownLeader") != "true" {
if respW.Header().Get("X-Nomad-KnownLeader") != "true" {
t.Fatalf("missing known leader")
}
if respW.HeaderMap.Get("X-Nomad-LastContact") == "" {
if respW.Header().Get("X-Nomad-LastContact") == "" {
t.Fatalf("missing last contact")
}

Expand Down Expand Up @@ -100,13 +100,13 @@ func TestHTTP_NodesPrefixList(t *testing.T) {
}

// Check for the index
if respW.HeaderMap.Get("X-Nomad-Index") == "" {
if respW.Header().Get("X-Nomad-Index") == "" {
t.Fatalf("missing index")
}
if respW.HeaderMap.Get("X-Nomad-KnownLeader") != "true" {
if respW.Header().Get("X-Nomad-KnownLeader") != "true" {
t.Fatalf("missing known leader")
}
if respW.HeaderMap.Get("X-Nomad-LastContact") == "" {
if respW.Header().Get("X-Nomad-LastContact") == "" {
t.Fatalf("missing last contact")
}

Expand Down Expand Up @@ -158,7 +158,7 @@ func TestHTTP_NodeForceEval(t *testing.T) {
}

// Check for the index
if respW.HeaderMap.Get("X-Nomad-Index") == "" {
if respW.Header().Get("X-Nomad-Index") == "" {
t.Fatalf("missing index")
}

Expand Down Expand Up @@ -218,13 +218,13 @@ func TestHTTP_NodeAllocations(t *testing.T) {
}

// Check for the index
if respW.HeaderMap.Get("X-Nomad-Index") == "" {
if respW.Header().Get("X-Nomad-Index") == "" {
t.Fatalf("missing index")
}
if respW.HeaderMap.Get("X-Nomad-KnownLeader") != "true" {
if respW.Header().Get("X-Nomad-KnownLeader") != "true" {
t.Fatalf("missing known leader")
}
if respW.HeaderMap.Get("X-Nomad-LastContact") == "" {
if respW.Header().Get("X-Nomad-LastContact") == "" {
t.Fatalf("missing last contact")
}

Expand Down Expand Up @@ -257,8 +257,13 @@ func TestHTTP_NodeDrain(t *testing.T) {
DrainSpec: &api.DrainSpec{
Deadline: 10 * time.Second,
},
Meta: map[string]string{
"reason": "drain",
},
}

beforeDrain := time.Unix(time.Now().Unix(), 0)

// Make the HTTP request
buf := encodeReq(drainReq)
req, err := http.NewRequest("POST", "/v1/node/"+node.ID+"/drain", buf)
Expand All @@ -270,13 +275,13 @@ func TestHTTP_NodeDrain(t *testing.T) {
require.Nil(err)

// Check for the index
require.NotZero(respW.HeaderMap.Get("X-Nomad-Index"))
require.NotEmpty(respW.Header().Get("X-Nomad-Index"))

// Check the response
dresp, ok := obj.(structs.NodeDrainUpdateResponse)
require.True(ok)

t.Logf("response index=%v node_update_index=0x%x", respW.HeaderMap.Get("X-Nomad-Index"),
t.Logf("response index=%v node_update_index=0x%x", respW.Header().Get("X-Nomad-Index"),
dresp.NodeModifyIndex)

// Check that the node has been updated
Expand All @@ -292,8 +297,16 @@ func TestHTTP_NodeDrain(t *testing.T) {
require.Equal(structs.NodeSchedulingIneligible, out.SchedulingEligibility)
}

require.NotNil(out.LastDrain)
require.Equal(map[string]string{
"reason": "drain",
}, out.LastDrain.Meta)

// Make the HTTP request to unset drain
drainReq.DrainSpec = nil
drainReq.Meta = map[string]string{
"cancel_reason": "changed my mind",
}
buf = encodeReq(drainReq)
req, err = http.NewRequest("POST", "/v1/node/"+node.ID+"/drain", buf)
require.Nil(err)
Expand All @@ -306,6 +319,19 @@ func TestHTTP_NodeDrain(t *testing.T) {
out, err = state.NodeByID(nil, node.ID)
require.Nil(err)
require.Nil(out.DrainStrategy)
require.NotNil(out.LastDrain)
require.False(out.LastDrain.StartedAt.Before(beforeDrain))
require.False(out.LastDrain.UpdatedAt.Before(out.LastDrain.StartedAt))
require.Contains([]structs.DrainStatus{structs.DrainStatusCanceled, structs.DrainStatusComplete}, out.LastDrain.Status)
if out.LastDrain.Status == structs.DrainStatusComplete {
require.Equal(map[string]string{
"reason": "drain",
}, out.LastDrain.Meta)
} else if out.LastDrain.Status == structs.DrainStatusCanceled {
require.Equal(map[string]string{
"cancel_reason": "changed my mind",
}, out.LastDrain.Meta)
}
})
}

Expand Down Expand Up @@ -337,7 +363,7 @@ func TestHTTP_NodeEligible(t *testing.T) {
require.Nil(err)

// Check for the index
require.NotZero(respW.HeaderMap.Get("X-Nomad-Index"))
require.NotZero(respW.Header().Get("X-Nomad-Index"))

// Check the response
_, ok := obj.(structs.NodeEligibilityUpdateResponse)
Expand Down Expand Up @@ -403,7 +429,7 @@ func TestHTTP_NodePurge(t *testing.T) {
}

// Check for the index
if respW.HeaderMap.Get("X-Nomad-Index") == "" {
if respW.Header().Get("X-Nomad-Index") == "" {
t.Fatalf("missing index")
}

Expand Down Expand Up @@ -456,13 +482,13 @@ func TestHTTP_NodeQuery(t *testing.T) {
}

// Check for the index
if respW.HeaderMap.Get("X-Nomad-Index") == "" {
if respW.Header().Get("X-Nomad-Index") == "" {
t.Fatalf("missing index")
}
if respW.HeaderMap.Get("X-Nomad-KnownLeader") != "true" {
if respW.Header().Get("X-Nomad-KnownLeader") != "true" {
t.Fatalf("missing known leader")
}
if respW.HeaderMap.Get("X-Nomad-LastContact") == "" {
if respW.Header().Get("X-Nomad-LastContact") == "" {
t.Fatalf("missing last contact")
}

Expand Down
Loading