Skip to content

Commit

Permalink
quotas: correct addComputedAllocAttrs (#23942)
Browse files Browse the repository at this point in the history
Quota usage calculation depends on allocation.Resources field (which will be
deprecated in the future), while device resources are being kept in
allocation.AllocatedResources and parsed into a structure (vendor/type/name)
in order for the ranking in the scheduler to find nodes that can satisfy device
requirements. To make device quotas work properly, this has to be temporarily
translated into allocation.Resources.Devices.

---------

Co-authored-by: Tim Gross <[email protected]>
  • Loading branch information
pkazmierczak and tgross authored Sep 12, 2024
1 parent 0babf01 commit 4ade277
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 0 deletions.
18 changes: 18 additions & 0 deletions nomad/state/state_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,24 @@ func addComputedAllocAttrs(allocs []*structs.Allocation, job *structs.Job) {
alloc.Resources.Add(task)
}

// While we still rely on alloc.Resources field for quotas, we have to add
// device info from AllocatedResources to alloc.Resources
for _, resources := range alloc.AllocatedResources.Tasks {
for _, d := range resources.Devices {
name := d.ID().String()
count := len(d.DeviceIDs)

if count > 0 {
if alloc.Resources.Devices == nil {
alloc.Resources.Devices = make(structs.ResourceDevices, 0)
}
alloc.Resources.Devices = append(
alloc.Resources.Devices, &structs.RequestedDevice{Name: name, Count: uint64(count)},
)
}
}
}

// Add the shared resources
alloc.Resources.Add(alloc.SharedResources)
}
Expand Down
44 changes: 44 additions & 0 deletions nomad/state/state_store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,50 @@ func TestStateStore_UpsertPlanResults_DeploymentUpdates(t *testing.T) {
}
}

func TestStateStore_UpsertPlanResults_AllocationResources(t *testing.T) {
ci.Parallel(t)

dev := &structs.RequestedDevice{Name: "nvidia/gpu/Tesla 60", Count: 1}
structuredDev := &structs.AllocatedDeviceResource{
Vendor: "nvidia",
Type: "gpu",
Name: "Tesla 60",
DeviceIDs: []string{"GPU-0668fc92-f8d5-07f6-e3cc-c07d76f466a1"},
}

state := testStateStore(t)
alloc := mock.Alloc()
job := alloc.Job
alloc.Job = nil
alloc.Resources = nil
alloc.AllocatedResources.Tasks["web"].Devices = []*structs.AllocatedDeviceResource{structuredDev}

must.NoError(t, state.UpsertJob(structs.MsgTypeTestSetup, 999, nil, job))

eval := mock.Eval()
eval.JobID = job.ID

// Create an eval
must.NoError(t, state.UpsertEvals(structs.MsgTypeTestSetup, 1, []*structs.Evaluation{eval}))

// Create a plan result
res := structs.ApplyPlanResultsRequest{
AllocUpdateRequest: structs.AllocUpdateRequest{
Alloc: []*structs.Allocation{alloc},
Job: job,
},
EvalID: eval.ID,
}

must.NoError(t, state.UpsertPlanResults(structs.MsgTypeTestSetup, 1000, &res))

out, err := state.AllocByID(nil, alloc.ID)
must.NoError(t, err)
must.Eq(t, alloc, out)

must.Eq(t, alloc.Resources.Devices[0], dev)
}

func TestStateStore_UpsertDeployment(t *testing.T) {
ci.Parallel(t)

Expand Down

0 comments on commit 4ade277

Please sign in to comment.