From 926f5ee004a4db4ffaeec48b67511f60374648d4 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Sat, 17 Sep 2016 11:28:02 -0700 Subject: [PATCH] Fix bug in which dead nodes weren't being properly handled by system scheduler --- scheduler/system_sched_test.go | 87 ++++++++++++++++++++++++++++++++++ scheduler/util.go | 23 +++++---- scheduler/util_test.go | 30 ++++++------ 3 files changed, 116 insertions(+), 24 deletions(-) diff --git a/scheduler/system_sched_test.go b/scheduler/system_sched_test.go index 664bfe60993..ba2d91dbe7e 100644 --- a/scheduler/system_sched_test.go +++ b/scheduler/system_sched_test.go @@ -1228,3 +1228,90 @@ func TestSystemSched_ChainedAlloc(t *testing.T) { t.Fatalf("expected: %v, actual: %v", 2, len(newAllocs)) } } + +func TestSystemSched_PlanWithDrainedNode(t *testing.T) { + h := NewHarness(t) + + // Register two nodes with two different classes + node := mock.Node() + node.NodeClass = "green" + node.Drain = true + node.ComputeClass() + noErr(t, h.State.UpsertNode(h.NextIndex(), node)) + + node2 := mock.Node() + node2.NodeClass = "blue" + node2.ComputeClass() + noErr(t, h.State.UpsertNode(h.NextIndex(), node2)) + + // Create a Job with two task groups, each constrianed on node class + job := mock.SystemJob() + tg1 := job.TaskGroups[0] + tg1.Constraints = append(tg1.Constraints, + &structs.Constraint{ + LTarget: "${node.class}", + RTarget: "green", + Operand: "==", + }) + + tg2 := tg1.Copy() + tg2.Name = "web2" + tg2.Constraints[0].RTarget = "blue" + job.TaskGroups = append(job.TaskGroups, tg2) + noErr(t, h.State.UpsertJob(h.NextIndex(), job)) + + // Create an allocation on each node + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = node.ID + alloc.Name = "my-job.web[0]" + alloc.TaskGroup = "web" + + alloc2 := mock.Alloc() + alloc2.Job = job + alloc2.JobID = job.ID + alloc2.NodeID = node2.ID + alloc2.Name = "my-job.web2[0]" + alloc2.TaskGroup = "web2" + noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc, alloc2})) + + // Create a mock evaluation to deal with drain + eval := &structs.Evaluation{ + ID: structs.GenerateUUID(), + Priority: 50, + TriggeredBy: structs.EvalTriggerNodeUpdate, + JobID: job.ID, + NodeID: node.ID, + } + + // Process the evaluation + err := h.Process(NewSystemScheduler, eval) + if err != nil { + t.Fatalf("err: %v", err) + } + + // Ensure a single plan + if len(h.Plans) != 1 { + t.Fatalf("bad: %#v", h.Plans) + } + plan := h.Plans[0] + + // Ensure the plan evicted the alloc on the failed node + planned := plan.NodeUpdate[node.ID] + if len(planned) != 1 { + t.Fatalf("bad: %#v", plan) + } + + // Ensure the plan didn't place + if len(plan.NodeAllocation) != 0 { + t.Fatalf("bad: %#v", plan) + } + + // Ensure the allocations is stopped + if planned[0].DesiredStatus != structs.AllocDesiredStatusStop { + t.Fatalf("bad: %#v", planned[0]) + } + + h.AssertEvalStatus(t, structs.EvalStatusComplete) +} diff --git a/scheduler/util.go b/scheduler/util.go index 6218c1c9a20..9e924a27cb0 100644 --- a/scheduler/util.go +++ b/scheduler/util.go @@ -190,15 +190,20 @@ func diffSystemAllocs(job *structs.Job, nodes []*structs.Node, taintedNodes map[ for nodeID, allocs := range nodeAllocs { diff := diffAllocs(job, taintedNodes, required, allocs, terminalAllocs) - // Mark the alloc as being for a specific node. - for i := range diff.place { - alloc := &diff.place[i] - - // If the new allocation isn't annotated with a previous allocation - // or if the previous allocation isn't from the same node then we - // annotate the allocTuple with a new Allocation - if alloc.Alloc == nil || alloc.Alloc.NodeID != nodeID { - alloc.Alloc = &structs.Allocation{NodeID: nodeID} + // If the node is tainted there should be no placements made + if _, ok := taintedNodes[nodeID]; ok { + diff.place = nil + } else { + // Mark the alloc as being for a specific node. + for i := range diff.place { + alloc := &diff.place[i] + + // If the new allocation isn't annotated with a previous allocation + // or if the previous allocation isn't from the same node then we + // annotate the allocTuple with a new Allocation + if alloc.Alloc == nil || alloc.Alloc.NodeID != nodeID { + alloc.Alloc = &structs.Allocation{NodeID: nodeID} + } } } diff --git a/scheduler/util_test.go b/scheduler/util_test.go index 2a8e11cdb6f..24a19bb3075 100644 --- a/scheduler/util_test.go +++ b/scheduler/util_test.go @@ -175,15 +175,6 @@ func TestDiffAllocs(t *testing.T) { func TestDiffSystemAllocs(t *testing.T) { job := mock.SystemJob() - // Create three alive nodes. - nodes := []*structs.Node{{ID: "foo"}, {ID: "bar"}, {ID: "baz"}, - {ID: "pipe"}} - - // The "old" job has a previous modify index - oldJob := new(structs.Job) - *oldJob = *job - oldJob.JobModifyIndex -= 1 - drainNode := mock.Node() drainNode.Drain = true @@ -191,10 +182,19 @@ func TestDiffSystemAllocs(t *testing.T) { deadNode.Status = structs.NodeStatusDown tainted := map[string]*structs.Node{ - "dead": deadNode, - "drainNode": drainNode, + deadNode.ID: deadNode, + drainNode.ID: drainNode, } + // Create three alive nodes. + nodes := []*structs.Node{{ID: "foo"}, {ID: "bar"}, {ID: "baz"}, + {ID: "pipe"}, {ID: drainNode.ID}, {ID: deadNode.ID}} + + // The "old" job has a previous modify index + oldJob := new(structs.Job) + *oldJob = *job + oldJob.JobModifyIndex -= 1 + allocs := []*structs.Allocation{ // Update allocation on baz &structs.Allocation{ @@ -215,14 +215,14 @@ func TestDiffSystemAllocs(t *testing.T) { // Stop allocation on draining node. &structs.Allocation{ ID: structs.GenerateUUID(), - NodeID: "drainNode", + NodeID: drainNode.ID, Name: "my-job.web[0]", Job: oldJob, }, // Mark as lost on a dead node &structs.Allocation{ ID: structs.GenerateUUID(), - NodeID: "dead", + NodeID: deadNode.ID, Name: "my-job.web[0]", Job: oldJob, }, @@ -272,8 +272,8 @@ func TestDiffSystemAllocs(t *testing.T) { } // We should place 1 - if len(place) != 2 { - t.Fatalf("bad: %#v", place) + if l := len(place); l != 2 { + t.Fatalf("bad: %#v", l) } // Ensure that the allocations which are replacements of terminal allocs are