Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Alloc error message when no nodes are in DC #619

Merged
merged 3 commits into from
Jan 5, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions api/allocations.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ type Allocation struct {
type AllocationMetric struct {
NodesEvaluated int
NodesFiltered int
NodesAvailable map[string]int
ClassFiltered map[string]int
ConstraintFiltered map[string]int
NodesExhausted int
Expand Down
8 changes: 8 additions & 0 deletions command/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,14 @@ func dumpAllocStatus(ui cli.Ui, alloc *api.Allocation) {
ui.Output(" * No nodes were eligible for evaluation")
}

// Print a helpful message if the user has asked for a DC that has no
// available nodes.
for dc, available := range alloc.Metrics.NodesAvailable {
if available == 0 {
ui.Output(fmt.Sprintf(" * No nodes are available in datacenter %q", dc))
}
}

// Print filter info
for class, num := range alloc.Metrics.ClassFiltered {
ui.Output(fmt.Sprintf(" * Class %q filtered %d nodes", class, num))
Expand Down
3 changes: 3 additions & 0 deletions nomad/structs/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -1695,6 +1695,9 @@ type AllocMetric struct {
// NodesFiltered is the number of nodes filtered due to a constraint
NodesFiltered int

// NodesAvailable is the number of nodes available for evaluation per DC.
NodesAvailable map[string]int

// ClassFiltered is the number of nodes filtered by class
ClassFiltered map[string]int

Expand Down
6 changes: 5 additions & 1 deletion scheduler/generic_sched.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ func (s *GenericScheduler) computeJobAllocs() error {
// computePlacements computes placements for allocations
func (s *GenericScheduler) computePlacements(place []allocTuple) error {
// Get the base nodes
nodes, err := readyNodesInDCs(s.state, s.job.Datacenters)
nodes, byDC, err := readyNodesInDCs(s.state, s.job.Datacenters)
if err != nil {
return err
}
Expand Down Expand Up @@ -279,6 +279,9 @@ func (s *GenericScheduler) computePlacements(place []allocTuple) error {
Metrics: s.ctx.Metrics(),
}

// Store the available nodes by datacenter
s.ctx.Metrics().NodesAvailable = byDC

// Set fields based on if we found an allocation option
if option != nil {
// Generate the service ids for the tasks which this allocation is going
Expand All @@ -300,5 +303,6 @@ func (s *GenericScheduler) computePlacements(place []allocTuple) error {
failedTG[missing.TaskGroup] = alloc
}
}

return nil
}
5 changes: 5 additions & 0 deletions scheduler/generic_sched_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,11 @@ func TestServiceSched_JobRegister_AllocFail(t *testing.T) {
t.Fatalf("bad: %#v", out[0].Metrics)
}

// Check the available nodes
if count, ok := out[0].Metrics.NodesAvailable["dc1"]; !ok || count != 0 {
t.Fatalf("bad: %#v", out[0].Metrics)
}

h.AssertEvalStatus(t, structs.EvalStatusComplete)
}

Expand Down
20 changes: 12 additions & 8 deletions scheduler/system_sched.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,13 @@ type SystemScheduler struct {
state State
planner Planner

eval *structs.Evaluation
job *structs.Job
plan *structs.Plan
ctx *EvalContext
stack *SystemStack
nodes []*structs.Node
eval *structs.Evaluation
job *structs.Job
plan *structs.Plan
ctx *EvalContext
stack *SystemStack
nodes []*structs.Node
nodesByDC map[string]int

limitReached bool
nextEval *structs.Evaluation
Expand Down Expand Up @@ -86,7 +87,7 @@ func (s *SystemScheduler) process() (bool, error) {

// Get the ready nodes in the required datacenters
if s.job != nil {
s.nodes, err = readyNodesInDCs(s.state, s.job.Datacenters)
s.nodes, s.nodesByDC, err = readyNodesInDCs(s.state, s.job.Datacenters)
if err != nil {
return false, fmt.Errorf("failed to get ready nodes: %v", err)
}
Expand Down Expand Up @@ -219,7 +220,7 @@ func (s *SystemScheduler) computePlacements(place []allocTuple) error {
return fmt.Errorf("could not find node %q", missing.Alloc.NodeID)
}

// Update the set of placement ndoes
// Update the set of placement nodes
nodes[0] = node
s.stack.SetNodes(nodes)

Expand All @@ -246,6 +247,9 @@ func (s *SystemScheduler) computePlacements(place []allocTuple) error {
Metrics: s.ctx.Metrics(),
}

// Store the available nodes by datacenter
s.ctx.Metrics().NodesAvailable = s.nodesByDC

// Set fields based on if we found an allocation option
if option != nil {
// Generate the service ids for the tasks that this allocation is going
Expand Down
5 changes: 5 additions & 0 deletions scheduler/system_sched_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ func TestSystemSched_JobRegister(t *testing.T) {
t.Fatalf("bad: %#v", out)
}

// Check the available nodes
if count, ok := out[0].Metrics.NodesAvailable["dc1"]; !ok || count != 10 {
t.Fatalf("bad: %#v", out[0].Metrics)
}

h.AssertEvalStatus(t, structs.EvalStatusComplete)
}

Expand Down
14 changes: 8 additions & 6 deletions scheduler/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,19 +172,20 @@ func diffSystemAllocs(job *structs.Job, nodes []*structs.Node, taintedNodes map[
return result
}

// readyNodesInDCs returns all the ready nodes in the given datacenters
func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, error) {
// readyNodesInDCs returns all the ready nodes in the given datacenters and a
// mapping of each data center to the count of ready nodes.
func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, map[string]int, error) {
// Index the DCs
dcMap := make(map[string]struct{}, len(dcs))
dcMap := make(map[string]int, len(dcs))
for _, dc := range dcs {
dcMap[dc] = struct{}{}
dcMap[dc] = 0
}

// Scan the nodes
var out []*structs.Node
iter, err := state.Nodes()
if err != nil {
return nil, err
return nil, nil, err
}
for {
raw := iter.Next()
Expand All @@ -204,8 +205,9 @@ func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, error) {
continue
}
out = append(out, node)
dcMap[node.Datacenter] += 1
}
return out, nil
return out, dcMap, nil
}

// retryMax is used to retry a callback until it returns success or
Expand Down
8 changes: 7 additions & 1 deletion scheduler/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ func TestReadyNodesInDCs(t *testing.T) {
noErr(t, state.UpsertNode(1002, node3))
noErr(t, state.UpsertNode(1003, node4))

nodes, err := readyNodesInDCs(state, []string{"dc1", "dc2"})
nodes, dc, err := readyNodesInDCs(state, []string{"dc1", "dc2"})
if err != nil {
t.Fatalf("err: %v", err)
}
Expand All @@ -215,6 +215,12 @@ func TestReadyNodesInDCs(t *testing.T) {
if nodes[0].ID == node3.ID || nodes[1].ID == node3.ID {
t.Fatalf("Bad: %#v", nodes)
}
if count, ok := dc["dc1"]; !ok || count != 1 {
t.Fatalf("Bad: dc1 count %v", count)
}
if count, ok := dc["dc2"]; !ok || count != 1 {
t.Fatalf("Bad: dc2 count %v", count)
}
}

func TestRetryMax(t *testing.T) {
Expand Down