diff --git a/CHANGELOG.md b/CHANGELOG.md index f977cd9a596..dfbad0040f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ IMPROVEMENTS: BUG FIXES: * template: Fixed a bug where dynamic secrets did not trigger the template `change_mode` after a client restart. [[GH-9636](https://github.com/hashicorp/nomad/issues/9636)] + * server: Fixed a bug where new servers may bootstrap prematurely when configured with `bootstrap_expect = 0`. ## 1.0.1 (December 16, 2020) diff --git a/nomad/serf.go b/nomad/serf.go index a955e7b1e77..0e63630bbe3 100644 --- a/nomad/serf.go +++ b/nomad/serf.go @@ -83,7 +83,7 @@ func (s *Server) nodeJoin(me serf.MemberEvent) { s.peerLock.Unlock() // If we still expecting to bootstrap, may need to handle this - if atomic.LoadInt32(&s.config.Bootstrapped) == 0 { + if s.config.BootstrapExpect != 0 && atomic.LoadInt32(&s.config.Bootstrapped) == 0 { s.maybeBootstrap() } } @@ -91,6 +91,12 @@ func (s *Server) nodeJoin(me serf.MemberEvent) { // maybeBootstrap is used to handle bootstrapping when a new server joins func (s *Server) maybeBootstrap() { + + // redundant check to ease testing + if s.config.BootstrapExpect == 0 { + return + } + // Bootstrap can only be done if there are no committed logs, remove our // expectations of bootstrapping. This is slightly cheaper than the full // check that BootstrapCluster will do, so this is a good pre-filter. diff --git a/nomad/serf_test.go b/nomad/serf_test.go index 56181e9b707..de747df0d9e 100644 --- a/nomad/serf_test.go +++ b/nomad/serf_test.go @@ -6,11 +6,13 @@ import ( "os" "path" "strings" + "sync/atomic" "testing" "time" "github.com/hashicorp/nomad/testutil" "github.com/hashicorp/serf/serf" + "github.com/stretchr/testify/require" ) func TestNomad_JoinPeer(t *testing.T) { @@ -442,3 +444,44 @@ func TestNomad_BadExpect(t *testing.T) { t.Fatalf("should have 0 peers: %v", err) }) } + +// TestNomad_NonBootstraping_ShouldntBootstap asserts that if BootstrapExpect is zero, +// the server shouldn't bootstrap +func TestNomad_NonBootstraping_ShouldntBootstap(t *testing.T) { + t.Parallel() + + dir := tmpDir(t) + defer os.RemoveAll(dir) + + s1, cleanupS1 := TestServer(t, func(c *Config) { + c.BootstrapExpect = 0 + c.DevMode = false + c.DataDir = path.Join(dir, "node") + }) + defer cleanupS1() + + testutil.WaitForResult(func() (bool, error) { + s1.peerLock.Lock() + p := len(s1.localPeers) + s1.peerLock.Unlock() + if p != 1 { + return false, fmt.Errorf("%d", p) + } + + return true, nil + }, func(err error) { + t.Fatalf("expected 1 local peer: %v", err) + }) + + // as non-bootstrap mode is the initial state, we must wait long enough to assert that + // we don't bootstrap even if enough time has elapsed. Also, explicitly attempt bootstrap. + s1.maybeBootstrap() + time.Sleep(100 * time.Millisecond) + + bootstrapped := atomic.LoadInt32(&s1.config.Bootstrapped) + require.Zero(t, bootstrapped, "expecting non-bootstrapped servers") + + p, _ := s1.numPeers() + require.Zero(t, p, "number of peers in Raft") + +}