diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a3bd4e5709..c38f1fd3dc9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,8 @@ BUG FIXES: be stored [GH-3372] * core: Fix issue where node-drain with complete batch allocation would create replacement [GH-3217] + * core: Allow batch jobs that have been purged to be rerun without a job + specification change [GH-3375] * core: Fix issue in which batch allocations from previous job versions may not have been stopped properly. [GH-3217] * core: Fix issue in which allocations with the same name during a scale diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go index dc18b17acd9..a94e0462ec1 100644 --- a/scheduler/reconcile.go +++ b/scheduler/reconcile.go @@ -500,7 +500,8 @@ func (a *allocReconciler) batchFiltration(all allocSet) (filtered, ignore allocS // Ignore terminal batch jobs from older versions for id, alloc := range filtered { - if alloc.Job.Version < a.job.Version && alloc.TerminalStatus() { + older := alloc.Job.Version < a.job.Version || alloc.Job.CreateIndex < a.job.CreateIndex + if older && alloc.TerminalStatus() { delete(filtered, id) ignored[id] = alloc } diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go index c7dcb58e49b..79d8a8b3dab 100644 --- a/scheduler/reconcile_test.go +++ b/scheduler/reconcile_test.go @@ -3102,3 +3102,49 @@ func TestReconciler_RollingUpgrade_MissingAllocs(t *testing.T) { assertNamesHaveIndexes(t, intRange(7, 9), placeResultsToNames(r.place)) assertNamesHaveIndexes(t, intRange(0, 0), destructiveResultsToNames(r.destructiveUpdate)) } + +// Tests that the reconciler handles rerunning a batch job in the case that the +// allocations are from an older instance of the job. +func TestReconciler_Batch_Rerun(t *testing.T) { + job := mock.Job() + job.Type = structs.JobTypeBatch + job.TaskGroups[0].Update = nil + + // Create 10 allocations from the old job and have them be complete + var allocs []*structs.Allocation + for i := 0; i < 10; i++ { + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = uuid.Generate() + alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) + alloc.TaskGroup = job.TaskGroups[0].Name + alloc.ClientStatus = structs.AllocClientStatusComplete + alloc.DesiredStatus = structs.AllocDesiredStatusStop + allocs = append(allocs, alloc) + } + + // Create a copy of the job that is "new" + job2 := job.Copy() + job2.CreateIndex++ + + reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, true, job2.ID, job2, nil, allocs, nil) + r := reconciler.Compute() + + // Assert the correct results + assertResults(t, r, &resultExpectation{ + createDeployment: nil, + deploymentUpdates: nil, + place: 10, + destructive: 0, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + job.TaskGroups[0].Name: { + Place: 10, + DestructiveUpdate: 0, + Ignore: 10, + }, + }, + }) + + assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place)) +}