diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index dd72a277a9f..de0ec4ec268 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -231,12 +231,23 @@ func (s *GenericScheduler) process() (bool, error) { // re-placed. func (s *GenericScheduler) filterCompleteAllocs(allocs []*structs.Allocation) []*structs.Allocation { filter := func(a *structs.Allocation) bool { - // Allocs from batch jobs should be filtered when their status is failed - // so that they will be replaced. If they are complete but not failed, they - // shouldn't be replaced. if s.batch { - return a.TerminalStatus() && - a.ClientStatus != structs.AllocClientStatusComplete + // Allocs from batch jobs should be filtered when the desired status + // is terminal or when the client status is failed so that they will + // be replaced. If they are complete but not failed, they shouldn't + // be replaced. + switch a.DesiredStatus { + case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict, structs.AllocDesiredStatusFailed: + return true + default: + } + + switch a.ClientStatus { + case structs.AllocClientStatusFailed: + return true + default: + return false + } } // Filter terminal, non batch allocations diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index 14925600d2b..68387334836 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -1079,7 +1079,7 @@ func TestServiceSched_RetryLimit(t *testing.T) { h.AssertEvalStatus(t, structs.EvalStatusFailed) } -func TestBatchSched_Run_DeadAlloc(t *testing.T) { +func TestBatchSched_Run_CompleteAlloc(t *testing.T) { h := NewHarness(t) // Create a node @@ -1091,7 +1091,7 @@ func TestBatchSched_Run_DeadAlloc(t *testing.T) { job.TaskGroups[0].Count = 1 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) - // Create a failed alloc + // Create a complete alloc alloc := mock.Alloc() alloc.Job = job alloc.JobID = job.ID @@ -1131,6 +1131,59 @@ func TestBatchSched_Run_DeadAlloc(t *testing.T) { h.AssertEvalStatus(t, structs.EvalStatusComplete) } +func TestBatchSched_Run_DrainedAlloc(t *testing.T) { + h := NewHarness(t) + + // Create a node + node := mock.Node() + noErr(t, h.State.UpsertNode(h.NextIndex(), node)) + + // Create a job + job := mock.Job() + job.TaskGroups[0].Count = 1 + noErr(t, h.State.UpsertJob(h.NextIndex(), job)) + + // Create a complete alloc + alloc := mock.Alloc() + alloc.Job = job + alloc.JobID = job.ID + alloc.NodeID = node.ID + alloc.Name = "my-job.web[0]" + alloc.DesiredStatus = structs.AllocDesiredStatusStop + alloc.ClientStatus = structs.AllocClientStatusComplete + noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) + + // Create a mock evaluation to register the job + eval := &structs.Evaluation{ + ID: structs.GenerateUUID(), + Priority: job.Priority, + TriggeredBy: structs.EvalTriggerJobRegister, + JobID: job.ID, + } + + // Process the evaluation + err := h.Process(NewBatchScheduler, eval) + if err != nil { + t.Fatalf("err: %v", err) + } + + // Ensure a plan + if len(h.Plans) != 1 { + t.Fatalf("bad: %#v", h.Plans) + } + + // Lookup the allocations by JobID + out, err := h.State.AllocsByJob(job.ID) + noErr(t, err) + + // Ensure a replacement alloc was placed. + if len(out) != 2 { + t.Fatalf("bad: %#v", out) + } + + h.AssertEvalStatus(t, structs.EvalStatusComplete) +} + func TestBatchSched_Run_FailedAlloc(t *testing.T) { h := NewHarness(t)