From 251bcf106f28189836298e4a06dbae92b4db63ad Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Mon, 4 Jan 2016 12:07:33 -0800 Subject: [PATCH 1/3] Store the available nodes in the alloc metric --- nomad/structs/structs.go | 3 +++ scheduler/generic_sched.go | 5 ++++- scheduler/system_sched.go | 8 ++++++-- scheduler/util.go | 14 ++++++++------ scheduler/util_test.go | 8 +++++++- 5 files changed, 28 insertions(+), 10 deletions(-) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index d28c23e4432..df75d62781c 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1695,6 +1695,9 @@ type AllocMetric struct { // NodesFiltered is the number of nodes filtered due to a constraint NodesFiltered int + // NodesAvailable is the number of nodes available for evaluation per DC. + NodesAvailable map[string]int + // ClassFiltered is the number of nodes filtered by class ClassFiltered map[string]int diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index edaeaf94c4d..6e76558784c 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -245,11 +245,14 @@ func (s *GenericScheduler) computeJobAllocs() error { // computePlacements computes placements for allocations func (s *GenericScheduler) computePlacements(place []allocTuple) error { // Get the base nodes - nodes, err := readyNodesInDCs(s.state, s.job.Datacenters) + nodes, byDC, err := readyNodesInDCs(s.state, s.job.Datacenters) if err != nil { return err } + // Store the available nodes by datacenter + s.ctx.Metrics().NodesAvailable = byDC + // Update the set of placement ndoes s.stack.SetNodes(nodes) diff --git a/scheduler/system_sched.go b/scheduler/system_sched.go index 80a87fe94f8..4909a0059e4 100644 --- a/scheduler/system_sched.go +++ b/scheduler/system_sched.go @@ -86,10 +86,14 @@ func (s *SystemScheduler) process() (bool, error) { // Get the ready nodes in the required datacenters if s.job != nil { - s.nodes, err = readyNodesInDCs(s.state, s.job.Datacenters) + var byDC map[string]int + s.nodes, byDC, err = readyNodesInDCs(s.state, s.job.Datacenters) if err != nil { return false, fmt.Errorf("failed to get ready nodes: %v", err) } + + // Store the available nodes by datacenter + s.ctx.Metrics().NodesAvailable = byDC } // Create a plan @@ -219,7 +223,7 @@ func (s *SystemScheduler) computePlacements(place []allocTuple) error { return fmt.Errorf("could not find node %q", missing.Alloc.NodeID) } - // Update the set of placement ndoes + // Update the set of placement nodes nodes[0] = node s.stack.SetNodes(nodes) diff --git a/scheduler/util.go b/scheduler/util.go index 27a714a0bc8..f448687c5ec 100644 --- a/scheduler/util.go +++ b/scheduler/util.go @@ -172,19 +172,20 @@ func diffSystemAllocs(job *structs.Job, nodes []*structs.Node, taintedNodes map[ return result } -// readyNodesInDCs returns all the ready nodes in the given datacenters -func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, error) { +// readyNodesInDCs returns all the ready nodes in the given datacenters and a +// mapping of each data center to the count of ready nodes. +func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, map[string]int, error) { // Index the DCs - dcMap := make(map[string]struct{}, len(dcs)) + dcMap := make(map[string]int, len(dcs)) for _, dc := range dcs { - dcMap[dc] = struct{}{} + dcMap[dc] = 0 } // Scan the nodes var out []*structs.Node iter, err := state.Nodes() if err != nil { - return nil, err + return nil, nil, err } for { raw := iter.Next() @@ -204,8 +205,9 @@ func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, error) { continue } out = append(out, node) + dcMap[node.Datacenter] += 1 } - return out, nil + return out, dcMap, nil } // retryMax is used to retry a callback until it returns success or diff --git a/scheduler/util_test.go b/scheduler/util_test.go index 4d6d21db85f..ee38ac16207 100644 --- a/scheduler/util_test.go +++ b/scheduler/util_test.go @@ -204,7 +204,7 @@ func TestReadyNodesInDCs(t *testing.T) { noErr(t, state.UpsertNode(1002, node3)) noErr(t, state.UpsertNode(1003, node4)) - nodes, err := readyNodesInDCs(state, []string{"dc1", "dc2"}) + nodes, dc, err := readyNodesInDCs(state, []string{"dc1", "dc2"}) if err != nil { t.Fatalf("err: %v", err) } @@ -215,6 +215,12 @@ func TestReadyNodesInDCs(t *testing.T) { if nodes[0].ID == node3.ID || nodes[1].ID == node3.ID { t.Fatalf("Bad: %#v", nodes) } + if count, ok := dc["dc1"]; !ok || count != 0 { + t.Fatalf("Bad: dc1 count %v", count) + } + if count, ok := dc["dc2"]; !ok || count != 2 { + t.Fatalf("Bad: dc2 count %v", count) + } } func TestRetryMax(t *testing.T) { From 892c7ddee3214dfff7a483dc5cd2ab0171c680de Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Mon, 4 Jan 2016 14:23:06 -0800 Subject: [PATCH 2/3] Fix bug, add tests, and cli output --- api/allocations.go | 1 + command/monitor.go | 8 ++++++++ scheduler/generic_sched.go | 7 ++++--- scheduler/generic_sched_test.go | 5 +++++ scheduler/system_sched.go | 22 +++++++++++----------- scheduler/system_sched_test.go | 5 +++++ 6 files changed, 34 insertions(+), 14 deletions(-) diff --git a/api/allocations.go b/api/allocations.go index 73f600d7e8a..e2885507f9b 100644 --- a/api/allocations.go +++ b/api/allocations.go @@ -62,6 +62,7 @@ type Allocation struct { type AllocationMetric struct { NodesEvaluated int NodesFiltered int + NodesAvailable map[string]int ClassFiltered map[string]int ConstraintFiltered map[string]int NodesExhausted int diff --git a/command/monitor.go b/command/monitor.go index be9b816dc24..5fd6050b182 100644 --- a/command/monitor.go +++ b/command/monitor.go @@ -280,6 +280,14 @@ func dumpAllocStatus(ui cli.Ui, alloc *api.Allocation) { ui.Output(" * No nodes were eligible for evaluation") } + // Print a helpful message if the user has asked for a DC that has no + // available nodes. + for dc, available := range alloc.Metrics.NodesAvailable { + if available == 0 { + ui.Output(fmt.Sprintf(" * No nodes are available in datacenter %q", dc)) + } + } + // Print filter info for class, num := range alloc.Metrics.ClassFiltered { ui.Output(fmt.Sprintf(" * Class %q filtered %d nodes", class, num)) diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index 6e76558784c..0f1fb757ae6 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -250,9 +250,6 @@ func (s *GenericScheduler) computePlacements(place []allocTuple) error { return err } - // Store the available nodes by datacenter - s.ctx.Metrics().NodesAvailable = byDC - // Update the set of placement ndoes s.stack.SetNodes(nodes) @@ -282,6 +279,9 @@ func (s *GenericScheduler) computePlacements(place []allocTuple) error { Metrics: s.ctx.Metrics(), } + // Store the available nodes by datacenter + s.ctx.Metrics().NodesAvailable = byDC + // Set fields based on if we found an allocation option if option != nil { // Generate the service ids for the tasks which this allocation is going @@ -303,5 +303,6 @@ func (s *GenericScheduler) computePlacements(place []allocTuple) error { failedTG[missing.TaskGroup] = alloc } } + return nil } diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index 7af4c4fb2a3..402ae061ff8 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -123,6 +123,11 @@ func TestServiceSched_JobRegister_AllocFail(t *testing.T) { t.Fatalf("bad: %#v", out[0].Metrics) } + // Check the available nodes + if count, ok := out[0].Metrics.NodesAvailable["dc1"]; !ok || count != 0 { + t.Fatalf("bad: %#v", out[0].Metrics) + } + h.AssertEvalStatus(t, structs.EvalStatusComplete) } diff --git a/scheduler/system_sched.go b/scheduler/system_sched.go index 4909a0059e4..9b352b81fb1 100644 --- a/scheduler/system_sched.go +++ b/scheduler/system_sched.go @@ -25,12 +25,13 @@ type SystemScheduler struct { state State planner Planner - eval *structs.Evaluation - job *structs.Job - plan *structs.Plan - ctx *EvalContext - stack *SystemStack - nodes []*structs.Node + eval *structs.Evaluation + job *structs.Job + plan *structs.Plan + ctx *EvalContext + stack *SystemStack + nodes []*structs.Node + nodesByDC map[string]int limitReached bool nextEval *structs.Evaluation @@ -86,14 +87,10 @@ func (s *SystemScheduler) process() (bool, error) { // Get the ready nodes in the required datacenters if s.job != nil { - var byDC map[string]int - s.nodes, byDC, err = readyNodesInDCs(s.state, s.job.Datacenters) + s.nodes, s.nodesByDC, err = readyNodesInDCs(s.state, s.job.Datacenters) if err != nil { return false, fmt.Errorf("failed to get ready nodes: %v", err) } - - // Store the available nodes by datacenter - s.ctx.Metrics().NodesAvailable = byDC } // Create a plan @@ -250,6 +247,9 @@ func (s *SystemScheduler) computePlacements(place []allocTuple) error { Metrics: s.ctx.Metrics(), } + // Store the available nodes by datacenter + s.ctx.Metrics().NodesAvailable = s.nodesByDC + // Set fields based on if we found an allocation option if option != nil { // Generate the service ids for the tasks that this allocation is going diff --git a/scheduler/system_sched_test.go b/scheduler/system_sched_test.go index c6f186ee3f5..fae5f322a76 100644 --- a/scheduler/system_sched_test.go +++ b/scheduler/system_sched_test.go @@ -59,6 +59,11 @@ func TestSystemSched_JobRegister(t *testing.T) { t.Fatalf("bad: %#v", out) } + // Check the available nodes + if count, ok := out[0].Metrics.NodesAvailable["dc1"]; !ok || count != 10 { + t.Fatalf("bad: %#v", out[0].Metrics) + } + h.AssertEvalStatus(t, structs.EvalStatusComplete) } From c51b69a825e0aabd8a20b567b98fa03fd0fea2fa Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Mon, 4 Jan 2016 14:33:10 -0800 Subject: [PATCH 3/3] Fix counts --- scheduler/util_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scheduler/util_test.go b/scheduler/util_test.go index ee38ac16207..7b213da77b5 100644 --- a/scheduler/util_test.go +++ b/scheduler/util_test.go @@ -215,10 +215,10 @@ func TestReadyNodesInDCs(t *testing.T) { if nodes[0].ID == node3.ID || nodes[1].ID == node3.ID { t.Fatalf("Bad: %#v", nodes) } - if count, ok := dc["dc1"]; !ok || count != 0 { + if count, ok := dc["dc1"]; !ok || count != 1 { t.Fatalf("Bad: dc1 count %v", count) } - if count, ok := dc["dc2"]; !ok || count != 2 { + if count, ok := dc["dc2"]; !ok || count != 1 { t.Fatalf("Bad: dc2 count %v", count) } }