Skip to content

Commit

Permalink
Merge pull request #6792 from hashicorp/b-propose-panic
Browse files Browse the repository at this point in the history
scheduler: fix panic when preempting and evicting allocs
  • Loading branch information
schmichael committed Dec 3, 2019
1 parent 4488559 commit 263f89e
Show file tree
Hide file tree
Showing 8 changed files with 500 additions and 376 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ BUG FIXES:

* core: Ignore `server` config values if `server` is disabled [[GH-6047](https://github.com/hashicorp/nomad/issues/6047)]
* core: Added `semver` constraint for strict Semver 2.0 version comparisons [[GH-6699](https://github.com/hashicorp/nomad/issues/6699)]
* core: Fixed server panic caused by a plan evicting and preempting allocs on a node [[GH-6792](https://github.com/hashicorp/nomad/issues/6792)]
* api: Return a 404 if endpoint not found instead of redirecting to /ui/ [[GH-6658](https://github.com/hashicorp/nomad/issues/6658)]
* api: Decompress web socket response body if gzipped on error responses [[GH-6650](https://github.com/hashicorp/nomad/issues/6650)]
* api: Fixed a bug where some FS/Allocation API endpoints didn't return error messages [[GH-6427](https://github.com/hashicorp/nomad/issues/6427)]
Expand Down
13 changes: 6 additions & 7 deletions scheduler/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ type Context interface {
// Reset is invoked after making a placement
Reset()

// ProposedAllocs returns the proposed allocations for a node
// which is the existing allocations, removing evictions, and
// adding any planned placements.
// ProposedAllocs returns the proposed allocations for a node which are
// the existing allocations, removing evictions, and adding any planned
// placements.
ProposedAllocs(nodeID string) ([]*structs.Allocation, error)

// RegexpCache is a cache of regular expressions
Expand Down Expand Up @@ -120,22 +120,21 @@ func (e *EvalContext) Reset() {
func (e *EvalContext) ProposedAllocs(nodeID string) ([]*structs.Allocation, error) {
// Get the existing allocations that are non-terminal
ws := memdb.NewWatchSet()
existingAlloc, err := e.state.AllocsByNodeTerminal(ws, nodeID, false)
proposed, err := e.state.AllocsByNodeTerminal(ws, nodeID, false)
if err != nil {
return nil, err
}

// Determine the proposed allocation by first removing allocations
// that are planned evictions and adding the new allocations.
proposed := existingAlloc
if update := e.plan.NodeUpdate[nodeID]; len(update) > 0 {
proposed = structs.RemoveAllocs(existingAlloc, update)
proposed = structs.RemoveAllocs(proposed, update)
}

// Remove any allocs that are being preempted
nodePreemptedAllocs := e.plan.NodePreemptions[nodeID]
if len(nodePreemptedAllocs) > 0 {
proposed = structs.RemoveAllocs(existingAlloc, nodePreemptedAllocs)
proposed = structs.RemoveAllocs(proposed, nodePreemptedAllocs)
}

// We create an index of the existing allocations so that if an inplace
Expand Down
116 changes: 113 additions & 3 deletions scheduler/context_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,9 @@ func TestEvalContext_ProposedAlloc(t *testing.T) {
ClientStatus: structs.AllocClientStatusPending,
TaskGroup: "web",
}
noErr(t, state.UpsertJobSummary(998, mock.JobSummary(alloc1.JobID)))
noErr(t, state.UpsertJobSummary(999, mock.JobSummary(alloc2.JobID)))
noErr(t, state.UpsertAllocs(1000, []*structs.Allocation{alloc1, alloc2}))
require.NoError(t, state.UpsertJobSummary(998, mock.JobSummary(alloc1.JobID)))
require.NoError(t, state.UpsertJobSummary(999, mock.JobSummary(alloc2.JobID)))
require.NoError(t, state.UpsertAllocs(1000, []*structs.Allocation{alloc1, alloc2}))

// Add a planned eviction to alloc1
plan := ctx.Plan()
Expand Down Expand Up @@ -149,6 +149,116 @@ func TestEvalContext_ProposedAlloc(t *testing.T) {
}
}

// TestEvalContext_ProposedAlloc_EvictPreempt asserts both Evicted and
// Preempted allocs are removed from the allocs propsed for a node.
//
// See https://github.com/hashicorp/nomad/issues/6787
//
func TestEvalContext_ProposedAlloc_EvictPreempt(t *testing.T) {
t.Parallel()
state, ctx := testContext(t)
nodes := []*RankedNode{
{
Node: &structs.Node{
ID: uuid.Generate(),
NodeResources: &structs.NodeResources{
Cpu: structs.NodeCpuResources{
CpuShares: 1024 * 3,
},
Memory: structs.NodeMemoryResources{
MemoryMB: 1024 * 3,
},
},
},
},
}

// Add existing allocations
j1, j2, j3 := mock.Job(), mock.Job(), mock.Job()
allocEvict := &structs.Allocation{
ID: uuid.Generate(),
Namespace: structs.DefaultNamespace,
EvalID: uuid.Generate(),
NodeID: nodes[0].Node.ID,
JobID: j1.ID,
Job: j1,
AllocatedResources: &structs.AllocatedResources{
Tasks: map[string]*structs.AllocatedTaskResources{
"web": {
Cpu: structs.AllocatedCpuResources{
CpuShares: 1024,
},
Memory: structs.AllocatedMemoryResources{
MemoryMB: 1024,
},
},
},
},
DesiredStatus: structs.AllocDesiredStatusRun,
ClientStatus: structs.AllocClientStatusPending,
TaskGroup: "web",
}
allocPreempt := &structs.Allocation{
ID: uuid.Generate(),
Namespace: structs.DefaultNamespace,
EvalID: uuid.Generate(),
NodeID: nodes[0].Node.ID,
JobID: j2.ID,
Job: j2,
AllocatedResources: &structs.AllocatedResources{
Tasks: map[string]*structs.AllocatedTaskResources{
"web": {
Cpu: structs.AllocatedCpuResources{
CpuShares: 1024,
},
Memory: structs.AllocatedMemoryResources{
MemoryMB: 1024,
},
},
},
},
DesiredStatus: structs.AllocDesiredStatusRun,
ClientStatus: structs.AllocClientStatusPending,
TaskGroup: "web",
}
allocPropose := &structs.Allocation{
ID: uuid.Generate(),
Namespace: structs.DefaultNamespace,
EvalID: uuid.Generate(),
NodeID: nodes[0].Node.ID,
JobID: j3.ID,
Job: j3,
AllocatedResources: &structs.AllocatedResources{
Tasks: map[string]*structs.AllocatedTaskResources{
"web": {
Cpu: structs.AllocatedCpuResources{
CpuShares: 1024,
},
Memory: structs.AllocatedMemoryResources{
MemoryMB: 1024,
},
},
},
},
DesiredStatus: structs.AllocDesiredStatusRun,
ClientStatus: structs.AllocClientStatusPending,
TaskGroup: "web",
}
require.NoError(t, state.UpsertJobSummary(998, mock.JobSummary(allocEvict.JobID)))
require.NoError(t, state.UpsertJobSummary(999, mock.JobSummary(allocPreempt.JobID)))
require.NoError(t, state.UpsertJobSummary(999, mock.JobSummary(allocPropose.JobID)))
require.NoError(t, state.UpsertAllocs(1000, []*structs.Allocation{allocEvict, allocPreempt, allocPropose}))

// Plan to evict one alloc and preempt another
plan := ctx.Plan()
plan.NodePreemptions[nodes[0].Node.ID] = []*structs.Allocation{allocEvict}
plan.NodeUpdate[nodes[0].Node.ID] = []*structs.Allocation{allocPreempt}

proposed, err := ctx.ProposedAllocs(nodes[0].Node.ID)
require.NoError(t, err)
require.Len(t, proposed, 1)
}

func TestEvalEligibility_JobStatus(t *testing.T) {
e := NewEvalEligibility()
cc := "v1:100"
Expand Down
Loading

0 comments on commit 263f89e

Please sign in to comment.