From 77e9a4e229d3f70912badb2f82e488a5d010ddcf Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Mon, 17 Oct 2022 13:57:48 -0400 Subject: [PATCH 1/3] make version checks specific to region * One-time tokens are not replicated between regions, so we don't want to enforce that the version check across all of serf, just members in the same region. * Cleans up a bunch of legacy checks. This changeset is specific to 1.2.x and the changes for other versions of Nomad will be manually backported in separate PRs --- .changelog/14910.txt | 3 +++ nomad/acl_endpoint.go | 6 +++--- nomad/core_sched.go | 2 +- nomad/job_endpoint.go | 4 ++-- nomad/leader.go | 8 ++++---- nomad/operator_endpoint.go | 4 ++-- nomad/periodic.go | 2 +- nomad/plan_apply.go | 2 +- nomad/util.go | 34 ++++++++---------------------- nomad/util_test.go | 42 ++++++++++++++++++++++++++++++++++++-- nomad/worker.go | 2 +- 11 files changed, 67 insertions(+), 42 deletions(-) create mode 100644 .changelog/14910.txt diff --git a/.changelog/14910.txt b/.changelog/14910.txt new file mode 100644 index 00000000000..bccb174388e --- /dev/null +++ b/.changelog/14910.txt @@ -0,0 +1,3 @@ +```release-note:bug +acl: Fixed a bug where Nomad version checking for one-time tokens was enforced across regions +``` diff --git a/nomad/acl_endpoint.go b/nomad/acl_endpoint.go index 6032a1bce5b..aec4c78d986 100644 --- a/nomad/acl_endpoint.go +++ b/nomad/acl_endpoint.go @@ -851,7 +851,7 @@ func (a *ACL) UpsertOneTimeToken(args *structs.OneTimeTokenUpsertRequest, reply defer metrics.MeasureSince( []string{"nomad", "acl", "upsert_one_time_token"}, time.Now()) - if !ServersMeetMinimumVersion(a.srv.Members(), minOneTimeAuthenticationTokenVersion, false) { + if !ServersMeetMinimumVersion(a.srv.Members(), a.srv.Region(), minOneTimeAuthenticationTokenVersion, false) { return fmt.Errorf("All servers should be running version %v or later to use one-time authentication tokens", minAutopilotVersion) } @@ -903,7 +903,7 @@ func (a *ACL) ExchangeOneTimeToken(args *structs.OneTimeTokenExchangeRequest, re defer metrics.MeasureSince( []string{"nomad", "acl", "exchange_one_time_token"}, time.Now()) - if !ServersMeetMinimumVersion(a.srv.Members(), minOneTimeAuthenticationTokenVersion, false) { + if !ServersMeetMinimumVersion(a.srv.Members(), a.srv.Region(), minOneTimeAuthenticationTokenVersion, false) { return fmt.Errorf("All servers should be running version %v or later to use one-time authentication tokens", minAutopilotVersion) } @@ -960,7 +960,7 @@ func (a *ACL) ExpireOneTimeTokens(args *structs.OneTimeTokenExpireRequest, reply defer metrics.MeasureSince( []string{"nomad", "acl", "expire_one_time_tokens"}, time.Now()) - if !ServersMeetMinimumVersion(a.srv.Members(), minOneTimeAuthenticationTokenVersion, false) { + if !ServersMeetMinimumVersion(a.srv.Members(), a.srv.Region(), minOneTimeAuthenticationTokenVersion, false) { return fmt.Errorf("All servers should be running version %v or later to use one-time authentication tokens", minAutopilotVersion) } diff --git a/nomad/core_sched.go b/nomad/core_sched.go index f6aa3c11281..2b9b3348944 100644 --- a/nomad/core_sched.go +++ b/nomad/core_sched.go @@ -505,7 +505,7 @@ OUTER: func (c *CoreScheduler) nodeReap(eval *structs.Evaluation, nodeIDs []string) error { // For old clusters, send single deregistration messages COMPAT(0.11) minVersionBatchNodeDeregister := version.Must(version.NewVersion("0.9.4")) - if !ServersMeetMinimumVersion(c.srv.Members(), minVersionBatchNodeDeregister, true) { + if !ServersMeetMinimumVersion(c.srv.Members(), c.srv.Region(), minVersionBatchNodeDeregister, true) { for _, id := range nodeIDs { req := structs.NodeDeregisterRequest{ NodeID: id, diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go index 3af8e654570..f2670657736 100644 --- a/nomad/job_endpoint.go +++ b/nomad/job_endpoint.go @@ -396,7 +396,7 @@ func (j *Job) Register(args *structs.JobRegisterRequest, reply *structs.JobRegis // COMPAT(1.1.0): Remove the ServerMeetMinimumVersion check to always set args.Eval // 0.12.1 introduced atomic eval job registration - if eval != nil && ServersMeetMinimumVersion(j.srv.Members(), minJobRegisterAtomicEvalVersion, false) { + if eval != nil && ServersMeetMinimumVersion(j.srv.Members(), j.srv.Region(), minJobRegisterAtomicEvalVersion, false) { args.Eval = eval submittedEval = true } @@ -881,7 +881,7 @@ func (j *Job) Deregister(args *structs.JobDeregisterRequest, reply *structs.JobD } // COMPAT(1.1.0): remove conditional and always set args.Eval - if ServersMeetMinimumVersion(j.srv.Members(), minJobRegisterAtomicEvalVersion, false) { + if ServersMeetMinimumVersion(j.srv.Members(), j.srv.Region(), minJobRegisterAtomicEvalVersion, false) { args.Eval = eval } diff --git a/nomad/leader.go b/nomad/leader.go index ecffabc85c7..ddb174bc012 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -749,7 +749,7 @@ func (s *Server) schedulePeriodic(stopCh chan struct{}) { s.evalBroker.Enqueue(s.coreJobEval(structs.CoreJobCSIVolumeClaimGC, index)) } case <-oneTimeTokenGC.C: - if !ServersMeetMinimumVersion(s.Members(), minOneTimeAuthenticationTokenVersion, false) { + if !ServersMeetMinimumVersion(s.Members(), s.Region(), minOneTimeAuthenticationTokenVersion, false) { continue } @@ -1578,7 +1578,7 @@ func (s *Server) getOrCreateAutopilotConfig() *structs.AutopilotConfig { return config } - if !ServersMeetMinimumVersion(s.Members(), minAutopilotVersion, false) { + if !ServersMeetMinimumVersion(s.Members(), AllRegions, minAutopilotVersion, false) { s.logger.Named("autopilot").Warn("can't initialize until all servers are above minimum version", "min_version", minAutopilotVersion) return nil } @@ -1605,7 +1605,7 @@ func (s *Server) getOrCreateSchedulerConfig() *structs.SchedulerConfiguration { if config != nil { return config } - if !ServersMeetMinimumVersion(s.Members(), minSchedulerConfigVersion, false) { + if !ServersMeetMinimumVersion(s.Members(), s.Region(), minSchedulerConfigVersion, false) { s.logger.Named("core").Warn("can't initialize scheduler config until all servers are above minimum version", "min_version", minSchedulerConfigVersion) return nil } @@ -1620,7 +1620,7 @@ func (s *Server) getOrCreateSchedulerConfig() *structs.SchedulerConfiguration { } func (s *Server) generateClusterID() (string, error) { - if !ServersMeetMinimumVersion(s.Members(), minClusterIDVersion, false) { + if !ServersMeetMinimumVersion(s.Members(), AllRegions, minClusterIDVersion, false) { s.logger.Named("core").Warn("cannot initialize cluster ID until all servers are above minimum version", "min_version", minClusterIDVersion) return "", errors.Errorf("cluster ID cannot be created until all servers are above minimum version %s", minClusterIDVersion) } diff --git a/nomad/operator_endpoint.go b/nomad/operator_endpoint.go index aab11c539ae..077a5df93da 100644 --- a/nomad/operator_endpoint.go +++ b/nomad/operator_endpoint.go @@ -248,7 +248,7 @@ func (op *Operator) AutopilotSetConfiguration(args *structs.AutopilotSetConfigRe } // All servers should be at or above 0.8.0 to apply this operatation - if !ServersMeetMinimumVersion(op.srv.Members(), minAutopilotVersion, false) { + if !ServersMeetMinimumVersion(op.srv.Members(), op.srv.Region(), minAutopilotVersion, false) { return fmt.Errorf("All servers should be running version %v to update autopilot config", minAutopilotVersion) } @@ -316,7 +316,7 @@ func (op *Operator) SchedulerSetConfiguration(args *structs.SchedulerSetConfigRe } // All servers should be at or above 0.9.0 to apply this operation - if !ServersMeetMinimumVersion(op.srv.Members(), minSchedulerConfigVersion, false) { + if !ServersMeetMinimumVersion(op.srv.Members(), op.srv.Region(), minSchedulerConfigVersion, false) { return fmt.Errorf("All servers should be running version %v to update scheduler config", minSchedulerConfigVersion) } diff --git a/nomad/periodic.go b/nomad/periodic.go index c4f40066f63..5b7aa5fdddd 100644 --- a/nomad/periodic.go +++ b/nomad/periodic.go @@ -80,7 +80,7 @@ func (s *Server) DispatchJob(job *structs.Job) (*structs.Evaluation, error) { eval.ModifyIndex = index // COMPAT(1.1): Remove in 1.1.0 - 0.12.1 introduced atomic eval job registration - if !ServersMeetMinimumVersion(s.Members(), minJobRegisterAtomicEvalVersion, false) { + if !ServersMeetMinimumVersion(s.Members(), s.Region(), minJobRegisterAtomicEvalVersion, false) { // Create a new evaluation eval.JobModifyIndex = index update := &structs.EvalUpdateRequest{ diff --git a/nomad/plan_apply.go b/nomad/plan_apply.go index eb790f88c71..209cd7598e9 100644 --- a/nomad/plan_apply.go +++ b/nomad/plan_apply.go @@ -219,7 +219,7 @@ func (p *planner) applyPlan(plan *structs.Plan, result *structs.PlanResult, snap preemptedJobIDs := make(map[structs.NamespacedID]struct{}) now := time.Now().UTC().UnixNano() - if ServersMeetMinimumVersion(p.Members(), MinVersionPlanNormalization, true) { + if ServersMeetMinimumVersion(p.Members(), p.Region(), MinVersionPlanNormalization, true) { // Initialize the allocs request using the new optimized log entry format. // Determine the minimum number of updates, could be more if there // are multiple updates per node diff --git a/nomad/util.go b/nomad/util.go index 210a202d959..55a4e1eea67 100644 --- a/nomad/util.go +++ b/nomad/util.go @@ -13,6 +13,7 @@ import ( "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/serf/serf" + "golang.org/x/exp/slices" ) // MinVersionPlanNormalization is the minimum version to support the @@ -149,15 +150,20 @@ func isNomadServer(m serf.Member) (bool, *serverParts) { return true, parts } +const AllRegions = "" + // ServersMeetMinimumVersion returns whether the Nomad servers are at least on the // given Nomad version. The checkFailedServers parameter specifies whether version // for the failed servers should be verified. -func ServersMeetMinimumVersion(members []serf.Member, minVersion *version.Version, checkFailedServers bool) bool { +func ServersMeetMinimumVersion(members []serf.Member, region string, minVersion *version.Version, checkFailedServers bool) bool { for _, member := range members { - if valid, parts := isNomadServer(member); valid && (parts.Status == serf.StatusAlive || (checkFailedServers && parts.Status == serf.StatusFailed)) { + valid, parts := isNomadServer(member) + if valid && + (parts.Region == region || region == AllRegions) && + (parts.Status == serf.StatusAlive || (checkFailedServers && parts.Status == serf.StatusFailed)) { // Check if the versions match - version.LessThan will return true for // 0.8.0-rc1 < 0.8.0, so we want to ignore the metadata - versionsMatch := slicesMatch(minVersion.Segments(), parts.Build.Segments()) + versionsMatch := slices.Equal(minVersion.Segments(), parts.Build.Segments()) if parts.Build.LessThan(minVersion) && !versionsMatch { return false } @@ -167,28 +173,6 @@ func ServersMeetMinimumVersion(members []serf.Member, minVersion *version.Versio return true } -func slicesMatch(a, b []int) bool { - if a == nil && b == nil { - return true - } - - if a == nil || b == nil { - return false - } - - if len(a) != len(b) { - return false - } - - for i := range a { - if a[i] != b[i] { - return false - } - } - - return true -} - // shuffleStrings randomly shuffles the list of strings func shuffleStrings(list []string) { for i := range list { diff --git a/nomad/util_test.go b/nomad/util_test.go index f9d304f5b3d..96923287fdb 100644 --- a/nomad/util_test.go +++ b/nomad/util_test.go @@ -150,7 +150,7 @@ func TestServersMeetMinimumVersionExcludingFailed(t *testing.T) { } for _, tc := range cases { - result := ServersMeetMinimumVersion(tc.members, tc.ver, false) + result := ServersMeetMinimumVersion(tc.members, AllRegions, tc.ver, false) if result != tc.expected { t.Fatalf("bad: %v, %v, %v", result, tc.ver.String(), tc) } @@ -188,7 +188,45 @@ func TestServersMeetMinimumVersionIncludingFailed(t *testing.T) { } for _, tc := range cases { - result := ServersMeetMinimumVersion(tc.members, tc.ver, true) + result := ServersMeetMinimumVersion(tc.members, AllRegions, tc.ver, true) + if result != tc.expected { + t.Fatalf("bad: %v, %v, %v", result, tc.ver.String(), tc) + } + } +} + +func TestServersMeetMinimumVersionSuffix(t *testing.T) { + t.Parallel() + + cases := []struct { + members []serf.Member + ver *version.Version + expected bool + }{ + // Multiple servers, meets req version + { + members: []serf.Member{ + makeMember("1.3.0", serf.StatusAlive), + makeMember("1.2.6", serf.StatusAlive), + makeMember("1.2.6-dev", serf.StatusFailed), + }, + ver: version.Must(version.NewVersion("1.2.6-dev")), + expected: true, + }, + // Multiple servers, doesn't meet req version + { + members: []serf.Member{ + makeMember("1.1.18", serf.StatusAlive), + makeMember("1.2.6-dev", serf.StatusAlive), + makeMember("1.0.11", serf.StatusFailed), + }, + ver: version.Must(version.NewVersion("1.2.6-dev")), + expected: false, + }, + } + + for _, tc := range cases { + result := ServersMeetMinimumVersion(tc.members, AllRegions, tc.ver, true) if result != tc.expected { t.Fatalf("bad: %v, %v, %v", result, tc.ver.String(), tc) } diff --git a/nomad/worker.go b/nomad/worker.go index 5c116e724c2..9c5433413e8 100644 --- a/nomad/worker.go +++ b/nomad/worker.go @@ -598,7 +598,7 @@ func (w *Worker) SubmitPlan(plan *structs.Plan) (*structs.PlanResult, scheduler. plan.SnapshotIndex = w.snapshotIndex // Normalize stopped and preempted allocs before RPC - normalizePlan := ServersMeetMinimumVersion(w.srv.Members(), MinVersionPlanNormalization, true) + normalizePlan := ServersMeetMinimumVersion(w.srv.Members(), w.srv.Region(), MinVersionPlanNormalization, true) if normalizePlan { plan.NormalizeAllocations() } From 47bd249f1a223a91b82e7c4191a72f9140354d7c Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Mon, 17 Oct 2022 15:58:56 -0400 Subject: [PATCH 2/3] fixup --- nomad/acl_endpoint.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nomad/acl_endpoint.go b/nomad/acl_endpoint.go index aec4c78d986..ca571973a48 100644 --- a/nomad/acl_endpoint.go +++ b/nomad/acl_endpoint.go @@ -852,7 +852,7 @@ func (a *ACL) UpsertOneTimeToken(args *structs.OneTimeTokenUpsertRequest, reply []string{"nomad", "acl", "upsert_one_time_token"}, time.Now()) if !ServersMeetMinimumVersion(a.srv.Members(), a.srv.Region(), minOneTimeAuthenticationTokenVersion, false) { - return fmt.Errorf("All servers should be running version %v or later to use one-time authentication tokens", minAutopilotVersion) + return fmt.Errorf("All servers should be running version %v or later to use one-time authentication tokens", minOneTimeAuthenticationTokenVersion) } // Snapshot the state @@ -904,7 +904,7 @@ func (a *ACL) ExchangeOneTimeToken(args *structs.OneTimeTokenExchangeRequest, re []string{"nomad", "acl", "exchange_one_time_token"}, time.Now()) if !ServersMeetMinimumVersion(a.srv.Members(), a.srv.Region(), minOneTimeAuthenticationTokenVersion, false) { - return fmt.Errorf("All servers should be running version %v or later to use one-time authentication tokens", minAutopilotVersion) + return fmt.Errorf("All servers should be running version %v or later to use one-time authentication tokens", minOneTimeAuthenticationTokenVersion) } // Snapshot the state @@ -961,7 +961,7 @@ func (a *ACL) ExpireOneTimeTokens(args *structs.OneTimeTokenExpireRequest, reply []string{"nomad", "acl", "expire_one_time_tokens"}, time.Now()) if !ServersMeetMinimumVersion(a.srv.Members(), a.srv.Region(), minOneTimeAuthenticationTokenVersion, false) { - return fmt.Errorf("All servers should be running version %v or later to use one-time authentication tokens", minAutopilotVersion) + return fmt.Errorf("All servers should be running version %v or later to use one-time authentication tokens", minOneTimeAuthenticationTokenVersion) } // Check management level permissions From 36fe860bd805215b279a8c826eacec354e0e7816 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Mon, 17 Oct 2022 16:06:21 -0400 Subject: [PATCH 3/3] remove cherry-picked test --- nomad/util_test.go | 38 -------------------------------------- 1 file changed, 38 deletions(-) diff --git a/nomad/util_test.go b/nomad/util_test.go index 96923287fdb..335f6811945 100644 --- a/nomad/util_test.go +++ b/nomad/util_test.go @@ -195,44 +195,6 @@ func TestServersMeetMinimumVersionIncludingFailed(t *testing.T) { } } -func TestServersMeetMinimumVersionSuffix(t *testing.T) { - t.Parallel() - - cases := []struct { - members []serf.Member - ver *version.Version - expected bool - }{ - // Multiple servers, meets req version - { - members: []serf.Member{ - makeMember("1.3.0", serf.StatusAlive), - makeMember("1.2.6", serf.StatusAlive), - makeMember("1.2.6-dev", serf.StatusFailed), - }, - ver: version.Must(version.NewVersion("1.2.6-dev")), - expected: true, - }, - // Multiple servers, doesn't meet req version - { - members: []serf.Member{ - makeMember("1.1.18", serf.StatusAlive), - makeMember("1.2.6-dev", serf.StatusAlive), - makeMember("1.0.11", serf.StatusFailed), - }, - ver: version.Must(version.NewVersion("1.2.6-dev")), - expected: false, - }, - } - - for _, tc := range cases { - result := ServersMeetMinimumVersion(tc.members, AllRegions, tc.ver, true) - if result != tc.expected { - t.Fatalf("bad: %v, %v, %v", result, tc.ver.String(), tc) - } - } -} - func makeMember(version string, status serf.MemberStatus) serf.Member { return serf.Member{ Name: "foo",