diff --git a/nomad/plan_apply.go b/nomad/plan_apply.go index 53e2d142cf1..4f527018b28 100644 --- a/nomad/plan_apply.go +++ b/nomad/plan_apply.go @@ -339,6 +339,7 @@ func normalizeStoppedAlloc(stoppedAlloc *structs.Allocation, now int64) *structs DesiredDescription: stoppedAlloc.DesiredDescription, ClientStatus: stoppedAlloc.ClientStatus, ModifyTime: now, + FollowupEvalID: stoppedAlloc.FollowupEvalID, } } diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 81cba1edec8..757d84f3bbb 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -5379,6 +5379,9 @@ func (s *StateSnapshot) DenormalizeAllocationDiffSlice(allocDiffs []*structs.All if allocDiff.ClientStatus != "" { allocCopy.ClientStatus = allocDiff.ClientStatus } + if allocDiff.FollowupEvalID != "" { + allocCopy.FollowupEvalID = allocDiff.FollowupEvalID + } } if allocDiff.ModifyTime != 0 { allocCopy.ModifyTime = allocDiff.ModifyTime diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index b671cab5866..676ec041cea 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -9465,7 +9465,7 @@ type Plan struct { // AppendStoppedAlloc marks an allocation to be stopped. The clientStatus of the // allocation may be optionally set by passing in a non-empty value. -func (p *Plan) AppendStoppedAlloc(alloc *Allocation, desiredDesc, clientStatus string) { +func (p *Plan) AppendStoppedAlloc(alloc *Allocation, desiredDesc, clientStatus, followupEvalID string) { newAlloc := new(Allocation) *newAlloc = *alloc @@ -9490,6 +9490,10 @@ func (p *Plan) AppendStoppedAlloc(alloc *Allocation, desiredDesc, clientStatus s newAlloc.AppendState(AllocStateFieldClientStatus, clientStatus) + if followupEvalID != "" { + newAlloc.FollowupEvalID = followupEvalID + } + node := alloc.NodeID existing := p.NodeUpdate[node] p.NodeUpdate[node] = append(existing, newAlloc) @@ -9564,6 +9568,7 @@ func (p *Plan) NormalizeAllocations() { ID: alloc.ID, DesiredDescription: alloc.DesiredDescription, ClientStatus: alloc.ClientStatus, + FollowupEvalID: alloc.FollowupEvalID, } } } diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index a160a3ec916..98cb49c1a4d 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -3563,7 +3563,7 @@ func TestPlan_NormalizeAllocations(t *testing.T) { } stoppedAlloc := MockAlloc() desiredDesc := "Desired desc" - plan.AppendStoppedAlloc(stoppedAlloc, desiredDesc, AllocClientStatusLost) + plan.AppendStoppedAlloc(stoppedAlloc, desiredDesc, AllocClientStatusLost, "followup-eval-id") preemptedAlloc := MockAlloc() preemptingAllocID := uuid.Generate() plan.AppendPreemptedAlloc(preemptedAlloc, preemptingAllocID) @@ -3575,6 +3575,7 @@ func TestPlan_NormalizeAllocations(t *testing.T) { ID: stoppedAlloc.ID, DesiredDescription: desiredDesc, ClientStatus: AllocClientStatusLost, + FollowupEvalID: "followup-eval-id", } assert.Equal(t, expectedStoppedAlloc, actualStoppedAlloc) actualPreemptedAlloc := plan.NodePreemptions[preemptedAlloc.NodeID][0] @@ -3593,7 +3594,7 @@ func TestPlan_AppendStoppedAllocAppendsAllocWithUpdatedAttrs(t *testing.T) { alloc := MockAlloc() desiredDesc := "Desired desc" - plan.AppendStoppedAlloc(alloc, desiredDesc, AllocClientStatusLost) + plan.AppendStoppedAlloc(alloc, desiredDesc, AllocClientStatusLost, "") expectedAlloc := new(Allocation) *expectedAlloc = *alloc diff --git a/nomad/worker_test.go b/nomad/worker_test.go index e8c6261dbb9..e034b96d1e7 100644 --- a/nomad/worker_test.go +++ b/nomad/worker_test.go @@ -437,7 +437,7 @@ func TestWorker_SubmitPlanNormalizedAllocations(t *testing.T) { NodePreemptions: make(map[string][]*structs.Allocation), } desiredDescription := "desired desc" - plan.AppendStoppedAlloc(stoppedAlloc, desiredDescription, structs.AllocClientStatusLost) + plan.AppendStoppedAlloc(stoppedAlloc, desiredDescription, structs.AllocClientStatusLost, "") preemptingAllocID := uuid.Generate() plan.AppendPreemptedAlloc(preemptedAlloc, preemptingAllocID) diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index 0e1cbfb07e2..1ec8cfc4d5e 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -378,7 +378,7 @@ func (s *GenericScheduler) computeJobAllocs() error { // Handle the stop for _, stop := range results.stop { - s.plan.AppendStoppedAlloc(stop.alloc, stop.statusDescription, stop.clientStatus) + s.plan.AppendStoppedAlloc(stop.alloc, stop.statusDescription, stop.clientStatus, stop.followupEvalID) } // Handle the in-place updates @@ -476,7 +476,7 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul stopPrevAlloc, stopPrevAllocDesc := missing.StopPreviousAlloc() prevAllocation := missing.PreviousAllocation() if stopPrevAlloc { - s.plan.AppendStoppedAlloc(prevAllocation, stopPrevAllocDesc, "") + s.plan.AppendStoppedAlloc(prevAllocation, stopPrevAllocDesc, "", "") } // Compute penalty nodes for rescheduled allocs diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go index 45d5debf79d..de40b164731 100644 --- a/scheduler/reconcile.go +++ b/scheduler/reconcile.go @@ -301,6 +301,19 @@ func (a *allocReconciler) markStop(allocs allocSet, clientStatus, statusDescript } } +// markDelayed does markStop, but optionally includes a FollowupEvalID so that we can update +// the stopped alloc with its delayed rescheduling evalID +func (a *allocReconciler) markDelayed(allocs allocSet, clientStatus, statusDescription string, followupEvals map[string]string) { + for _, alloc := range allocs { + a.result.stop = append(a.result.stop, allocStopResult{ + alloc: alloc, + clientStatus: clientStatus, + statusDescription: statusDescription, + followupEvalID: followupEvals[alloc.ID], + }) + } +} + // computeGroup reconciles state for a particular task group. It returns whether // the deployment it is for is complete with regards to the task group. func (a *allocReconciler) computeGroup(group string, all allocSet) bool { @@ -355,7 +368,7 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { // Find delays for any lost allocs that have stop_after_client_disconnect lostLater := lost.delayByStopAfterClientDisconnect() - a.handleDelayedLost(lostLater, all, tg.Name) + lostLaterEvals := a.handleDelayedLost(lostLater, all, tg.Name) // Create batched follow up evaluations for allocations that are // reschedulable later and mark the allocations for in place updating @@ -368,7 +381,7 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { // Stop any unneeded allocations and update the untainted set to not // included stopped allocations. canaryState := dstate != nil && dstate.DesiredCanaries != 0 && !dstate.Promoted - stop := a.computeStop(tg, nameIndex, untainted, migrate, lost, canaries, canaryState) + stop := a.computeStop(tg, nameIndex, untainted, migrate, lost, canaries, canaryState, lostLaterEvals) desiredChanges.Stop += uint64(len(stop)) untainted = untainted.difference(stop) @@ -705,13 +718,13 @@ func (a *allocReconciler) computePlacements(group *structs.TaskGroup, // the group definition, the set of allocations in various states and whether we // are canarying. func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *allocNameIndex, - untainted, migrate, lost, canaries allocSet, canaryState bool) allocSet { + untainted, migrate, lost, canaries allocSet, canaryState bool, followupEvals map[string]string) allocSet { // Mark all lost allocations for stop. Previous allocation doesn't matter // here since it is on a lost node var stop allocSet stop = stop.union(lost) - a.markStop(lost, structs.AllocClientStatusLost, allocLost) + a.markDelayed(lost, structs.AllocClientStatusLost, allocLost, followupEvals) // If we are still deploying or creating canaries, don't stop them if canaryState { @@ -836,22 +849,33 @@ func (a *allocReconciler) computeUpdates(group *structs.TaskGroup, untainted all return } -// handleDelayedReschedules creates batched followup evaluations with the WaitUntil field set -// for allocations that are eligible to be rescheduled later +// handleDelayedReschedules creates batched followup evaluations with the WaitUntil field +// set for allocations that are eligible to be rescheduled later, and marks the alloc with +// the followupEvalID func (a *allocReconciler) handleDelayedReschedules(rescheduleLater []*delayedRescheduleInfo, all allocSet, tgName string) { - a.handleDelayedReschedulesImpl(rescheduleLater, all, tgName, true) -} + // followupEvals are created in the same way as for delayed lost allocs + allocIDToFollowupEvalID := a.handleDelayedLost(rescheduleLater, all, tgName) + + // Initialize the annotations + if len(allocIDToFollowupEvalID) != 0 && a.result.attributeUpdates == nil { + a.result.attributeUpdates = make(map[string]*structs.Allocation) + } -// handleDelayedLost creates batched followup evaluations with the WaitUntil field set for lost allocations -func (a *allocReconciler) handleDelayedLost(rescheduleLater []*delayedRescheduleInfo, all allocSet, tgName string) { - a.handleDelayedReschedulesImpl(rescheduleLater, all, tgName, false) + // Create updates that will be applied to the allocs to mark the FollowupEvalID + for allocID, evalID := range allocIDToFollowupEvalID { + existingAlloc := all[allocID] + updatedAlloc := existingAlloc.Copy() + updatedAlloc.FollowupEvalID = evalID + a.result.attributeUpdates[updatedAlloc.ID] = updatedAlloc + } } -// handleDelayedReschedulesImpl creates batched followup evaluations with the WaitUntil field set -func (a *allocReconciler) handleDelayedReschedulesImpl(rescheduleLater []*delayedRescheduleInfo, all allocSet, tgName string, - createUpdates bool) { +// handleDelayedLost creates batched followup evaluations with the WaitUntil field set for +// lost allocations. followupEvals are appended to a.result as a side effect, we return a +// map of alloc IDs to their followupEval IDs +func (a *allocReconciler) handleDelayedLost(rescheduleLater []*delayedRescheduleInfo, all allocSet, tgName string) map[string]string { if len(rescheduleLater) == 0 { - return + return map[string]string{} } // Sort by time @@ -904,18 +928,5 @@ func (a *allocReconciler) handleDelayedReschedulesImpl(rescheduleLater []*delaye a.result.desiredFollowupEvals[tgName] = evals - // Initialize the annotations - if len(allocIDToFollowupEvalID) != 0 && a.result.attributeUpdates == nil { - a.result.attributeUpdates = make(map[string]*structs.Allocation) - } - - // Create in-place updates for every alloc ID that needs to be updated with its follow up eval ID - if createUpdates { - for allocID, evalID := range allocIDToFollowupEvalID { - existingAlloc := all[allocID] - updatedAlloc := existingAlloc.Copy() - updatedAlloc.FollowupEvalID = evalID - a.result.attributeUpdates[updatedAlloc.ID] = updatedAlloc - } - } + return allocIDToFollowupEvalID } diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go index e8da880dbf9..4c50b25c525 100644 --- a/scheduler/reconcile_util.go +++ b/scheduler/reconcile_util.go @@ -41,6 +41,7 @@ type allocStopResult struct { alloc *structs.Allocation clientStatus string statusDescription string + followupEvalID string } // allocPlaceResult contains the information required to place a single diff --git a/scheduler/system_sched.go b/scheduler/system_sched.go index 2940c9b5af2..2c179de0567 100644 --- a/scheduler/system_sched.go +++ b/scheduler/system_sched.go @@ -212,18 +212,18 @@ func (s *SystemScheduler) computeJobAllocs() error { // Add all the allocs to stop for _, e := range diff.stop { - s.plan.AppendStoppedAlloc(e.Alloc, allocNotNeeded, "") + s.plan.AppendStoppedAlloc(e.Alloc, allocNotNeeded, "", "") } // Add all the allocs to migrate for _, e := range diff.migrate { - s.plan.AppendStoppedAlloc(e.Alloc, allocNodeTainted, "") + s.plan.AppendStoppedAlloc(e.Alloc, allocNodeTainted, "", "") } // Lost allocations should be transitioned to desired status stop and client // status lost. for _, e := range diff.lost { - s.plan.AppendStoppedAlloc(e.Alloc, allocLost, structs.AllocClientStatusLost) + s.plan.AppendStoppedAlloc(e.Alloc, allocLost, structs.AllocClientStatusLost, "") } // Attempt to do the upgrades in place diff --git a/scheduler/util.go b/scheduler/util.go index 0890f32dfca..713de7ad31d 100644 --- a/scheduler/util.go +++ b/scheduler/util.go @@ -601,7 +601,7 @@ func inplaceUpdate(ctx Context, eval *structs.Evaluation, job *structs.Job, // the current allocation is discounted when checking for feasibility. // Otherwise we would be trying to fit the tasks current resources and // updated resources. After select is called we can remove the evict. - ctx.Plan().AppendStoppedAlloc(update.Alloc, allocInPlace, "") + ctx.Plan().AppendStoppedAlloc(update.Alloc, allocInPlace, "", "") // Attempt to match the task group option := stack.Select(update.TaskGroup, nil) // This select only looks at one node so we don't pass selectOptions @@ -670,7 +670,7 @@ func evictAndPlace(ctx Context, diff *diffResult, allocs []allocTuple, desc stri n := len(allocs) for i := 0; i < n && i < *limit; i++ { a := allocs[i] - ctx.Plan().AppendStoppedAlloc(a.Alloc, desc, "") + ctx.Plan().AppendStoppedAlloc(a.Alloc, desc, "", "") diff.place = append(diff.place, a) } if n <= *limit { @@ -831,7 +831,7 @@ func updateNonTerminalAllocsToLost(plan *structs.Plan, tainted map[string]*struc alloc.DesiredStatus == structs.AllocDesiredStatusEvict) && (alloc.ClientStatus == structs.AllocClientStatusRunning || alloc.ClientStatus == structs.AllocClientStatusPending) { - plan.AppendStoppedAlloc(alloc, allocLost, structs.AllocClientStatusLost) + plan.AppendStoppedAlloc(alloc, allocLost, structs.AllocClientStatusLost, "") } } } @@ -881,7 +881,7 @@ func genericAllocUpdateFn(ctx Context, stack Stack, evalID string) allocUpdateTy // the current allocation is discounted when checking for feasibility. // Otherwise we would be trying to fit the tasks current resources and // updated resources. After select is called we can remove the evict. - ctx.Plan().AppendStoppedAlloc(existing, allocInPlace, "") + ctx.Plan().AppendStoppedAlloc(existing, allocInPlace, "", "") // Attempt to match the task group option := stack.Select(newTG, nil) // This select only looks at one node so we don't pass selectOptions