Skip to content

Commit

Permalink
Merge pull request #13208 from hashicorp/backport/b-batch-preempt2/vi…
Browse files Browse the repository at this point in the history
…rtually-tolerant-marlin

Backport of core: reschedule evicted batch job when resources become available into release/1.1.x
  • Loading branch information
shoenig authored Jun 3, 2022
2 parents 4cd07bb + 98c46be commit 31bf7c3
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 3 deletions.
3 changes: 3 additions & 0 deletions .changelog/13205.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
core: Fixed a bug where an evicted batch job would not be rescheduled
```
7 changes: 5 additions & 2 deletions scheduler/reconcile_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,8 @@ func (a allocSet) filterByRescheduleable(isBatch bool, now time.Time, evalID str
return
}

// shouldFilter returns whether the alloc should be ignored or considered untainted
// shouldFilter returns whether the alloc should be ignored or considered untainted.
//
// Ignored allocs are filtered out.
// Untainted allocs count against the desired total.
// Filtering logic for batch jobs:
Expand All @@ -309,11 +310,13 @@ func shouldFilter(alloc *structs.Allocation, isBatch bool) (untainted, ignore bo
// complete but not failed, they shouldn't be replaced.
if isBatch {
switch alloc.DesiredStatus {
case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict:
case structs.AllocDesiredStatusStop:
if alloc.RanSuccessfully() {
return true, false
}
return false, true
case structs.AllocDesiredStatusEvict:
return false, true
default:
}

Expand Down
109 changes: 108 additions & 1 deletion scheduler/reconcile_util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package scheduler

import (
"testing"

"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/nomad/mock"
Expand Down Expand Up @@ -134,3 +134,110 @@ func TestAllocSet_filterByTainted(t *testing.T) {
require.Contains(lost, "lost1")
require.Contains(lost, "lost2")
}

func TestReconcile_shouldFilter(t *testing.T) {
testCases := []struct {
description string
batch bool
failed bool
desiredStatus string
clientStatus string

untainted bool
ignore bool
}{
{
description: "batch running",
batch: true,
failed: false,
desiredStatus: structs.AllocDesiredStatusRun,
clientStatus: structs.AllocClientStatusRunning,
untainted: true,
ignore: false,
},
{
description: "batch stopped success",
batch: true,
failed: false,
desiredStatus: structs.AllocDesiredStatusStop,
clientStatus: structs.AllocClientStatusRunning,
untainted: true,
ignore: false,
},
{
description: "batch stopped failed",
batch: true,
failed: true,
desiredStatus: structs.AllocDesiredStatusStop,
clientStatus: structs.AllocClientStatusComplete,
untainted: false,
ignore: true,
},
{
description: "batch evicted",
batch: true,
desiredStatus: structs.AllocDesiredStatusEvict,
clientStatus: structs.AllocClientStatusComplete,
untainted: false,
ignore: true,
},
{
description: "batch failed",
batch: true,
desiredStatus: structs.AllocDesiredStatusRun,
clientStatus: structs.AllocClientStatusFailed,
untainted: false,
ignore: false,
},
{
description: "service running",
batch: false,
failed: false,
desiredStatus: structs.AllocDesiredStatusRun,
clientStatus: structs.AllocClientStatusRunning,
untainted: false,
ignore: false,
},
{
description: "service stopped",
batch: false,
failed: false,
desiredStatus: structs.AllocDesiredStatusStop,
clientStatus: structs.AllocClientStatusComplete,
untainted: false,
ignore: true,
},
{
description: "service evicted",
batch: false,
failed: false,
desiredStatus: structs.AllocDesiredStatusEvict,
clientStatus: structs.AllocClientStatusComplete,
untainted: false,
ignore: true,
},
{
description: "service client complete",
batch: false,
failed: false,
desiredStatus: structs.AllocDesiredStatusRun,
clientStatus: structs.AllocClientStatusComplete,
untainted: false,
ignore: true,
},
}

for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
alloc := &structs.Allocation{
DesiredStatus: tc.desiredStatus,
TaskStates: map[string]*structs.TaskState{"task": {State: structs.TaskStateDead, Failed: tc.failed}},
ClientStatus: tc.clientStatus,
}

untainted, ignore := shouldFilter(alloc, tc.batch)
require.Equal(t, tc.untainted, untainted)
require.Equal(t, tc.ignore, ignore)
})
}
}

0 comments on commit 31bf7c3

Please sign in to comment.