Skip to content

Commit

Permalink
core: reschedule evicted batch job when resources become available
Browse files Browse the repository at this point in the history
This PR fixes a bug where an evicted batch job would not be rescheduled
once resources become available.

Closes #9890
  • Loading branch information
shoenig authored and ChaiWithJai committed Jun 3, 2022
1 parent a1cbbac commit 8a0c410
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 3 deletions.
3 changes: 3 additions & 0 deletions .changelog/13205.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
core: Fixed a bug where an evicted batch job would not be rescheduled
```
7 changes: 5 additions & 2 deletions scheduler/reconcile_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,8 @@ func (a allocSet) filterByRescheduleable(isBatch, isDisconnecting bool, now time
return
}

// shouldFilter returns whether the alloc should be ignored or considered untainted
// shouldFilter returns whether the alloc should be ignored or considered untainted.
//
// Ignored allocs are filtered out.
// Untainted allocs count against the desired total.
// Filtering logic for batch jobs:
Expand All @@ -418,11 +419,13 @@ func shouldFilter(alloc *structs.Allocation, isBatch bool) (untainted, ignore bo
// complete but not failed, they shouldn't be replaced.
if isBatch {
switch alloc.DesiredStatus {
case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict:
case structs.AllocDesiredStatusStop:
if alloc.RanSuccessfully() {
return true, false
}
return false, true
case structs.AllocDesiredStatusEvict:
return false, true
default:
}

Expand Down
108 changes: 107 additions & 1 deletion scheduler/reconcile_util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package scheduler

import (
"testing"

"time"

"github.com/hashicorp/nomad/ci"
Expand Down Expand Up @@ -826,3 +825,110 @@ func TestAllocSet_filterByTainted(t *testing.T) {
})
}
}

func TestReconcile_shouldFilter(t *testing.T) {
testCases := []struct {
description string
batch bool
failed bool
desiredStatus string
clientStatus string

untainted bool
ignore bool
}{
{
description: "batch running",
batch: true,
failed: false,
desiredStatus: structs.AllocDesiredStatusRun,
clientStatus: structs.AllocClientStatusRunning,
untainted: true,
ignore: false,
},
{
description: "batch stopped success",
batch: true,
failed: false,
desiredStatus: structs.AllocDesiredStatusStop,
clientStatus: structs.AllocClientStatusRunning,
untainted: true,
ignore: false,
},
{
description: "batch stopped failed",
batch: true,
failed: true,
desiredStatus: structs.AllocDesiredStatusStop,
clientStatus: structs.AllocClientStatusComplete,
untainted: false,
ignore: true,
},
{
description: "batch evicted",
batch: true,
desiredStatus: structs.AllocDesiredStatusEvict,
clientStatus: structs.AllocClientStatusComplete,
untainted: false,
ignore: true,
},
{
description: "batch failed",
batch: true,
desiredStatus: structs.AllocDesiredStatusRun,
clientStatus: structs.AllocClientStatusFailed,
untainted: false,
ignore: false,
},
{
description: "service running",
batch: false,
failed: false,
desiredStatus: structs.AllocDesiredStatusRun,
clientStatus: structs.AllocClientStatusRunning,
untainted: false,
ignore: false,
},
{
description: "service stopped",
batch: false,
failed: false,
desiredStatus: structs.AllocDesiredStatusStop,
clientStatus: structs.AllocClientStatusComplete,
untainted: false,
ignore: true,
},
{
description: "service evicted",
batch: false,
failed: false,
desiredStatus: structs.AllocDesiredStatusEvict,
clientStatus: structs.AllocClientStatusComplete,
untainted: false,
ignore: true,
},
{
description: "service client complete",
batch: false,
failed: false,
desiredStatus: structs.AllocDesiredStatusRun,
clientStatus: structs.AllocClientStatusComplete,
untainted: false,
ignore: true,
},
}

for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
alloc := &structs.Allocation{
DesiredStatus: tc.desiredStatus,
TaskStates: map[string]*structs.TaskState{"task": {State: structs.TaskStateDead, Failed: tc.failed}},
ClientStatus: tc.clientStatus,
}

untainted, ignore := shouldFilter(alloc, tc.batch)
require.Equal(t, tc.untainted, untainted)
require.Equal(t, tc.ignore, ignore)
})
}
}

0 comments on commit 8a0c410

Please sign in to comment.