-
Notifications
You must be signed in to change notification settings - Fork 2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
scheduler: stop allocs in unrelated nodes #11391
Merged
Merged
Changes from 3 commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -65,7 +65,8 @@ func diffSystemAllocsForNode( | |
job *structs.Job, // job whose allocs are going to be diff-ed | ||
nodeID string, | ||
eligibleNodes map[string]*structs.Node, | ||
taintedNodes map[string]*structs.Node, // nodes which are down or in drain (by node name) | ||
notReadyNodes map[string]struct{}, // nodes that are not ready, e.g. draining | ||
taintedNodes map[string]*structs.Node, // nodes which are down (by node name) | ||
required map[string]*structs.TaskGroup, // set of allocations that must exist | ||
allocs []*structs.Allocation, // non-terminal allocations that exist | ||
terminal structs.TerminalByNodeByName, // latest terminal allocations (by node, name) | ||
|
@@ -139,10 +140,21 @@ func diffSystemAllocsForNode( | |
|
||
// For an existing allocation, if the nodeID is no longer | ||
// eligible, the diff should be ignored | ||
if _, ok := eligibleNodes[nodeID]; !ok { | ||
if _, ok := notReadyNodes[nodeID]; ok { | ||
goto IGNORE | ||
Comment on lines
-142
to
144
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the source of the bug. Previously, if the |
||
} | ||
|
||
// Existing allocations on nodes that are no longer targeted | ||
// should be stopped | ||
if _, ok := eligibleNodes[nodeID]; !ok { | ||
result.stop = append(result.stop, allocTuple{ | ||
Name: name, | ||
TaskGroup: tg, | ||
Alloc: exist, | ||
}) | ||
continue | ||
} | ||
|
||
// If the definition is updated we need to update | ||
if job.JobModifyIndex != exist.Job.JobModifyIndex { | ||
result.update = append(result.update, allocTuple{ | ||
|
@@ -229,7 +241,8 @@ func diffSystemAllocsForNode( | |
// diffResult contain the specific nodeID they should be allocated on. | ||
func diffSystemAllocs( | ||
job *structs.Job, // jobs whose allocations are going to be diff-ed | ||
nodes []*structs.Node, // list of nodes in the ready state | ||
readyNodes []*structs.Node, // list of nodes in the ready state | ||
notReadyNodes map[string]struct{}, // list of nodes in DC but not ready, e.g. draining | ||
taintedNodes map[string]*structs.Node, // nodes which are down or drain mode (by name) | ||
allocs []*structs.Allocation, // non-terminal allocations | ||
terminal structs.TerminalByNodeByName, // latest terminal allocations (by name) | ||
|
@@ -238,12 +251,11 @@ func diffSystemAllocs( | |
// Build a mapping of nodes to all their allocs. | ||
nodeAllocs := make(map[string][]*structs.Allocation, len(allocs)) | ||
for _, alloc := range allocs { | ||
nallocs := append(nodeAllocs[alloc.NodeID], alloc) //nolint:gocritic | ||
nodeAllocs[alloc.NodeID] = nallocs | ||
nodeAllocs[alloc.NodeID] = append(nodeAllocs[alloc.NodeID], alloc) | ||
} | ||
|
||
eligibleNodes := make(map[string]*structs.Node) | ||
for _, node := range nodes { | ||
for _, node := range readyNodes { | ||
if _, ok := nodeAllocs[node.ID]; !ok { | ||
nodeAllocs[node.ID] = nil | ||
} | ||
|
@@ -255,7 +267,7 @@ func diffSystemAllocs( | |
|
||
result := new(diffResult) | ||
for nodeID, allocs := range nodeAllocs { | ||
diff := diffSystemAllocsForNode(job, nodeID, eligibleNodes, taintedNodes, required, allocs, terminal) | ||
diff := diffSystemAllocsForNode(job, nodeID, eligibleNodes, notReadyNodes, taintedNodes, required, allocs, terminal) | ||
result.Append(diff) | ||
} | ||
|
||
|
@@ -264,7 +276,7 @@ func diffSystemAllocs( | |
|
||
// readyNodesInDCs returns all the ready nodes in the given datacenters and a | ||
// mapping of each data center to the count of ready nodes. | ||
func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, map[string]int, error) { | ||
func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, map[string]struct{}, map[string]int, error) { | ||
// Index the DCs | ||
dcMap := make(map[string]int, len(dcs)) | ||
for _, dc := range dcs { | ||
|
@@ -274,9 +286,10 @@ func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, map[string]int | |
// Scan the nodes | ||
ws := memdb.NewWatchSet() | ||
var out []*structs.Node | ||
notReady := map[string]struct{}{} | ||
iter, err := state.Nodes(ws) | ||
if err != nil { | ||
return nil, nil, err | ||
return nil, nil, nil, err | ||
} | ||
for { | ||
raw := iter.Next() | ||
|
@@ -287,6 +300,7 @@ func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, map[string]int | |
// Filter on datacenter and status | ||
node := raw.(*structs.Node) | ||
if !node.Ready() { | ||
notReady[node.ID] = struct{}{} | ||
continue | ||
} | ||
if _, ok := dcMap[node.Datacenter]; !ok { | ||
|
@@ -295,7 +309,7 @@ func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, map[string]int | |
out = append(out, node) | ||
dcMap[node.Datacenter]++ | ||
} | ||
return out, dcMap, nil | ||
return out, notReady, dcMap, nil | ||
} | ||
|
||
// retryMax is used to retry a callback until it returns success or | ||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
taintedNodes
logic is actually confusing IMO.taintedNodes
function filters the nodes with Down status as well or marked for draining in ShouldDrainNodenomad/scheduler/util.go
Lines 351 to 377 in b0ce684
However, nodes that are up but marked for draining were already filtered out by
readyNodesForDCs
nomad/scheduler/util.go
Lines 277 to 313 in b0ce684
So
taintedNodes
is only the down nodes. Reasoning through the code is a bit more complex and didn't feel confident restructuring that logic to be more explicit about node state.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just to add a small nitpick on this, the comment says
(by node name)
but the code indexes them by ID.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's interesting that
structs.TerminalByNodeByName
struct is actually grouped by node ID too. I'll update the comment, but rename the struct in a follow up PR.