From 682dbaac5afc222ca1074c94a045d5f7cc2b2108 Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Thu, 2 Jun 2022 11:42:15 -0500 Subject: [PATCH] core: reschedule evicted batch job when resources become available This PR fixes a bug where an evicted batch job would not be rescheduled once resources become available. Closes #9890 --- .changelog/13205.txt | 3 + scheduler/reconcile_util.go | 7 +- scheduler/reconcile_util_test.go | 108 ++++++++++++++++++++++++++++++- 3 files changed, 115 insertions(+), 3 deletions(-) create mode 100644 .changelog/13205.txt diff --git a/.changelog/13205.txt b/.changelog/13205.txt new file mode 100644 index 00000000000..9882e74b660 --- /dev/null +++ b/.changelog/13205.txt @@ -0,0 +1,3 @@ +```release-note:bug +core: Fixed a bug where an evicted batch job would not be rescheduled +``` diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go index 4d0cfb7a882..d0eb0a92a18 100644 --- a/scheduler/reconcile_util.go +++ b/scheduler/reconcile_util.go @@ -401,7 +401,8 @@ func (a allocSet) filterByRescheduleable(isBatch, isDisconnecting bool, now time return } -// shouldFilter returns whether the alloc should be ignored or considered untainted +// shouldFilter returns whether the alloc should be ignored or considered untainted. +// // Ignored allocs are filtered out. // Untainted allocs count against the desired total. // Filtering logic for batch jobs: @@ -418,11 +419,13 @@ func shouldFilter(alloc *structs.Allocation, isBatch bool) (untainted, ignore bo // complete but not failed, they shouldn't be replaced. if isBatch { switch alloc.DesiredStatus { - case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict: + case structs.AllocDesiredStatusStop: if alloc.RanSuccessfully() { return true, false } return false, true + case structs.AllocDesiredStatusEvict: + return false, true default: } diff --git a/scheduler/reconcile_util_test.go b/scheduler/reconcile_util_test.go index 12d1c6b09f5..ab386739333 100644 --- a/scheduler/reconcile_util_test.go +++ b/scheduler/reconcile_util_test.go @@ -2,7 +2,6 @@ package scheduler import ( "testing" - "time" "github.com/hashicorp/nomad/ci" @@ -826,3 +825,110 @@ func TestAllocSet_filterByTainted(t *testing.T) { }) } } + +func TestReconcile_shouldFilter(t *testing.T) { + testCases := []struct { + description string + batch bool + failed bool + desiredStatus string + clientStatus string + + untainted bool + ignore bool + }{ + { + description: "batch running", + batch: true, + failed: false, + desiredStatus: structs.AllocDesiredStatusRun, + clientStatus: structs.AllocClientStatusRunning, + untainted: true, + ignore: false, + }, + { + description: "batch stopped success", + batch: true, + failed: false, + desiredStatus: structs.AllocDesiredStatusStop, + clientStatus: structs.AllocClientStatusRunning, + untainted: true, + ignore: false, + }, + { + description: "batch stopped failed", + batch: true, + failed: true, + desiredStatus: structs.AllocDesiredStatusStop, + clientStatus: structs.AllocClientStatusComplete, + untainted: false, + ignore: true, + }, + { + description: "batch evicted", + batch: true, + desiredStatus: structs.AllocDesiredStatusEvict, + clientStatus: structs.AllocClientStatusComplete, + untainted: false, + ignore: true, + }, + { + description: "batch failed", + batch: true, + desiredStatus: structs.AllocDesiredStatusRun, + clientStatus: structs.AllocClientStatusFailed, + untainted: false, + ignore: false, + }, + { + description: "service running", + batch: false, + failed: false, + desiredStatus: structs.AllocDesiredStatusRun, + clientStatus: structs.AllocClientStatusRunning, + untainted: false, + ignore: false, + }, + { + description: "service stopped", + batch: false, + failed: false, + desiredStatus: structs.AllocDesiredStatusStop, + clientStatus: structs.AllocClientStatusComplete, + untainted: false, + ignore: true, + }, + { + description: "service evicted", + batch: false, + failed: false, + desiredStatus: structs.AllocDesiredStatusEvict, + clientStatus: structs.AllocClientStatusComplete, + untainted: false, + ignore: true, + }, + { + description: "service client complete", + batch: false, + failed: false, + desiredStatus: structs.AllocDesiredStatusRun, + clientStatus: structs.AllocClientStatusComplete, + untainted: false, + ignore: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + alloc := &structs.Allocation{ + DesiredStatus: tc.desiredStatus, + TaskStates: map[string]*structs.TaskState{"task": {State: structs.TaskStateDead, Failed: tc.failed}}, + ClientStatus: tc.clientStatus, + } + + untainted, ignore := shouldFilter(alloc, tc.batch) + require.Equal(t, tc.untainted, untainted) + require.Equal(t, tc.ignore, ignore) + }) + } +}