Skip to content

Commit

Permalink
Fix preemption panic (#11346)
Browse files Browse the repository at this point in the history
Fix a bug where the scheduler may panic when preemption is enabled. The conditions are a bit complicated:
A job with higher priority that schedule multiple allocations that preempt other multiple allocations on the same node, due to port/network/device assignments.

The cause of the bug is incidental mutation of internal cached data. `RankedNode` computes and cache proposed allocations  in https://github.com/hashicorp/nomad/blob/v1.1.6/scheduler/rank.go#L42-L53 . But scheduler then mutates the list to remove pre-emptable allocs in https://github.com/hashicorp/nomad/blob/v1.1.6/scheduler/rank.go#L293-L294, and  `RemoveAllocs` mutates and sets the tail of cached slice with `nil`s triggering a nil-pointer derefencing case.

I fixed the issue by avoiding the mutation in `RemoveAllocs` - the micro-optimization there doesn't seem necessary.

Fixes #11342
  • Loading branch information
Mahmood Ali authored and lgfa29 committed Nov 15, 2021
1 parent 5ad38bd commit f9e173e
Show file tree
Hide file tree
Showing 2 changed files with 139 additions and 10 deletions.
19 changes: 9 additions & 10 deletions nomad/structs/funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,24 +44,23 @@ func warningsFormatter(es []error) string {

// RemoveAllocs is used to remove any allocs with the given IDs
// from the list of allocations
func RemoveAllocs(alloc []*Allocation, remove []*Allocation) []*Allocation {
func RemoveAllocs(allocs []*Allocation, remove []*Allocation) []*Allocation {
if len(remove) == 0 {
return allocs
}
// Convert remove into a set
removeSet := make(map[string]struct{})
for _, remove := range remove {
removeSet[remove.ID] = struct{}{}
}

n := len(alloc)
for i := 0; i < n; i++ {
if _, ok := removeSet[alloc[i].ID]; ok {
alloc[i], alloc[n-1] = alloc[n-1], nil
i--
n--
r := make([]*Allocation, 0, len(allocs))
for _, alloc := range allocs {
if _, ok := removeSet[alloc.ID]; !ok {
r = append(r, alloc)
}
}

alloc = alloc[:n]
return alloc
return r
}

// FilterTerminalAllocs filters out all allocations in a terminal state and
Expand Down
130 changes: 130 additions & 0 deletions scheduler/preemption_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1381,6 +1381,136 @@ func TestPreemption(t *testing.T) {
}
}

// TestPreemptionMultiple tests evicting multiple allocations in the same time
func TestPreemptionMultiple(t *testing.T) {
// The test setup:
// * a node with 4 GPUs
// * a low priority job with 4 allocs, each is using 1 GPU
//
// Then schedule a high priority job needing 2 allocs, using 2 GPUs each.
// Expectation:
// All low priority allocs should preempted to accomodate the high priority job
h := NewHarness(t)

// node with 4 GPUs
node := mock.Node()
node.NodeResources = &structs.NodeResources{
Cpu: structs.NodeCpuResources{
CpuShares: 4000,
},
Memory: structs.NodeMemoryResources{
MemoryMB: 8192,
},
Disk: structs.NodeDiskResources{
DiskMB: 100 * 1024,
},
Networks: []*structs.NetworkResource{
{
Device: "eth0",
CIDR: "192.168.0.100/32",
MBits: 1000,
},
},
Devices: []*structs.NodeDeviceResource{
{
Type: "gpu",
Vendor: "nvidia",
Name: "1080ti",
Attributes: map[string]*psstructs.Attribute{
"memory": psstructs.NewIntAttribute(11, psstructs.UnitGiB),
"cuda_cores": psstructs.NewIntAttribute(3584, ""),
"graphics_clock": psstructs.NewIntAttribute(1480, psstructs.UnitMHz),
"memory_bandwidth": psstructs.NewIntAttribute(11, psstructs.UnitGBPerS),
},
Instances: []*structs.NodeDevice{
{
ID: "dev0",
Healthy: true,
},
{
ID: "dev1",
Healthy: true,
},
{
ID: "dev2",
Healthy: true,
},
{
ID: "dev3",
Healthy: true,
},
},
},
},
}

require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))

// low priority job with 4 allocs using all 4 GPUs
lowPrioJob := mock.Job()
lowPrioJob.Priority = 5
lowPrioJob.TaskGroups[0].Count = 4
lowPrioJob.TaskGroups[0].Networks = nil
lowPrioJob.TaskGroups[0].Tasks[0].Services = nil
lowPrioJob.TaskGroups[0].Tasks[0].Resources.Networks = nil
lowPrioJob.TaskGroups[0].Tasks[0].Resources.Devices = structs.ResourceDevices{{
Name: "gpu",
Count: 1,
}}
require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), lowPrioJob))

allocs := []*structs.Allocation{}
allocIDs := map[string]struct{}{}
for i := 0; i < 4; i++ {
alloc := createAllocWithDevice(uuid.Generate(), lowPrioJob, lowPrioJob.TaskGroups[0].Tasks[0].Resources, &structs.AllocatedDeviceResource{
Type: "gpu",
Vendor: "nvidia",
Name: "1080ti",
DeviceIDs: []string{fmt.Sprintf("dev%d", i)},
})
alloc.NodeID = node.ID

allocs = append(allocs, alloc)
allocIDs[alloc.ID] = struct{}{}
}
require.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))

// new high priority job with 2 allocs, each using 2 GPUs
highPrioJob := mock.Job()
highPrioJob.Priority = 100
highPrioJob.TaskGroups[0].Count = 2
highPrioJob.TaskGroups[0].Networks = nil
highPrioJob.TaskGroups[0].Tasks[0].Services = nil
highPrioJob.TaskGroups[0].Tasks[0].Resources.Networks = nil
highPrioJob.TaskGroups[0].Tasks[0].Resources.Devices = structs.ResourceDevices{{
Name: "gpu",
Count: 2,
}}
require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), highPrioJob))

// schedule
eval := &structs.Evaluation{
Namespace: structs.DefaultNamespace,
ID: uuid.Generate(),
Priority: highPrioJob.Priority,
TriggeredBy: structs.EvalTriggerJobRegister,
JobID: highPrioJob.ID,
Status: structs.EvalStatusPending,
}
require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))

// Process the evaluation
require.NoError(t, h.Process(NewServiceScheduler, eval))
require.Len(t, h.Plans, 1)
require.Contains(t, h.Plans[0].NodePreemptions, node.ID)

preempted := map[string]struct{}{}
for _, alloc := range h.Plans[0].NodePreemptions[node.ID] {
preempted[alloc.ID] = struct{}{}
}
require.Equal(t, allocIDs, preempted)
}

// helper method to create allocations with given jobs and resources
func createAlloc(id string, job *structs.Job, resource *structs.Resources) *structs.Allocation {
return createAllocInner(id, job, resource, nil, nil)
Expand Down

0 comments on commit f9e173e

Please sign in to comment.