From fb2e761cda83d847960df140a743537db0827d2d Mon Sep 17 00:00:00 2001 From: Luiz Aoqui Date: Tue, 12 Jul 2022 15:33:12 -0400 Subject: [PATCH] core: use stable time FSM operation Set the timestamp for a plan apply operation at request time to avoid non-deterministic operations in the FSM. --- nomad/plan_apply.go | 4 +++- nomad/state/state_store.go | 3 +-- nomad/structs/structs.go | 3 +++ 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/nomad/plan_apply.go b/nomad/plan_apply.go index 21e4651d25a..57bb0c0873a 100644 --- a/nomad/plan_apply.go +++ b/nomad/plan_apply.go @@ -234,6 +234,8 @@ func (p *planner) snapshotMinIndex(prevPlanResultIndex, planSnapshotIndex uint64 // applyPlan is used to apply the plan result and to return the alloc index func (p *planner) applyPlan(plan *structs.Plan, result *structs.PlanResult, snap *state.StateSnapshot) (raft.ApplyFuture, error) { + now := time.Now().UTC().UnixNano() + // Setup the update request req := structs.ApplyPlanResultsRequest{ AllocUpdateRequest: structs.AllocUpdateRequest{ @@ -243,10 +245,10 @@ func (p *planner) applyPlan(plan *structs.Plan, result *structs.PlanResult, snap DeploymentUpdates: result.DeploymentUpdates, IneligibleNodes: result.IneligibleNodes, EvalID: plan.EvalID, + UpdatedAt: now, } preemptedJobIDs := make(map[structs.NamespacedID]struct{}) - now := time.Now().UTC().UnixNano() if ServersMeetMinimumVersion(p.Members(), MinVersionPlanNormalization, true) { // Initialize the allocs request using the new optimized log entry format. diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 35046b0fb74..41998f81057 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -371,7 +371,6 @@ func (s *StateStore) UpsertPlanResults(msgType structs.MessageType, index uint64 defer txn.Abort() // Mark nodes as ineligible. - now := time.Now().Unix() for _, nodeID := range results.IneligibleNodes { s.logger.Warn("marking node as ineligible due to multiple plan rejections, refer to https://www.nomadproject.io/s/port-plan-failure for more information", "node_id", nodeID) @@ -380,7 +379,7 @@ func (s *StateStore) UpsertPlanResults(msgType structs.MessageType, index uint64 SetMessage(NodeEligibilityEventPlanRejectThreshold) err := s.updateNodeEligibilityImpl(index, nodeID, - structs.NodeSchedulingIneligible, now, nodeEvent, txn) + structs.NodeSchedulingIneligible, results.UpdatedAt, nodeEvent, txn) if err != nil { return err } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 179b54a9eed..c8bbbcd5387 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -918,6 +918,9 @@ type ApplyPlanResultsRequest struct { // placements for and should therefore be considered ineligible by workers // to avoid retrying them repeatedly. IneligibleNodes []string + + // UpdatedAt represents server time of receiving request. + UpdatedAt int64 } // AllocUpdateRequest is used to submit changes to allocations, either