Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Teach Nomad servers how to fall back to Consul. #1276

Merged
merged 13 commits into from
Jun 16, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ const (

// datacenterQueryLimit searches through up to this many adjacent
// datacenters looking for the Nomad server service.
datacenterQueryLimit = 5
datacenterQueryLimit = 9

// registerRetryIntv is minimum interval on which we retry
// registration. We pick a value between this and 2x this.
Expand Down Expand Up @@ -1255,8 +1255,8 @@ func (c *Client) setupConsulSyncer() error {
// a new set of servers so it's okay.
nearestDC := dcs[0]
otherDCs := make([]string, 0, len(dcs))
otherDCs = dcs[1:lib.MinInt(len(dcs), datacenterQueryLimit)]
shuffleStrings(otherDCs)
otherDCs = dcs[1:lib.MinInt(len(dcs), datacenterQueryLimit)]

dcs = append([]string{nearestDC}, otherDCs...)
}
Expand All @@ -1272,7 +1272,7 @@ func (c *Client) setupConsulSyncer() error {
var mErr multierror.Error
const defaultMaxNumNomadServers = 8
nomadServerServices := make([]string, 0, defaultMaxNumNomadServers)
c.logger.Printf("[DEBUG] client.consul: bootstrap contacting following Consul DCs: %q", dcs)
c.logger.Printf("[DEBUG] client.consul: bootstrap contacting following Consul DCs: %+q", dcs)
for _, dc := range dcs {
consulOpts := &consulapi.QueryOptions{
AllowStale: true,
Expand Down
7 changes: 4 additions & 3 deletions command/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"strconv"
"strings"
"sync"
"sync/atomic"
"time"

"github.com/hashicorp/nomad/client"
Expand Down Expand Up @@ -110,7 +111,7 @@ func (a *Agent) serverConfig() (*nomad.Config, error) {
if a.config.Server.BootstrapExpect == 1 {
conf.Bootstrap = true
} else {
conf.BootstrapExpect = a.config.Server.BootstrapExpect
atomic.StoreInt32(&conf.BootstrapExpect, int32(a.config.Server.BootstrapExpect))
}
}
if a.config.DataDir != "" {
Expand Down Expand Up @@ -235,7 +236,7 @@ func (a *Agent) serverConfig() (*nomad.Config, error) {
return nil, fmt.Errorf("server_service_name must be set when auto_advertise is enabled")
}

// conf.ConsulConfig = a.config.Consul
conf.ConsulConfig = a.config.Consul

return conf, nil
}
Expand Down Expand Up @@ -379,7 +380,7 @@ func (a *Agent) setupServer() error {
}

// Create the server
server, err := nomad.NewServer(conf)
server, err := nomad.NewServer(conf, a.consulSyncer)
if err != nil {
return fmt.Errorf("server setup failed: %v", err)
}
Expand Down
9 changes: 7 additions & 2 deletions nomad/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (

"github.com/hashicorp/memberlist"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/nomad/structs/config"
"github.com/hashicorp/nomad/scheduler"
"github.com/hashicorp/raft"
"github.com/hashicorp/serf/serf"
Expand Down Expand Up @@ -51,8 +52,9 @@ type Config struct {

// BootstrapExpect mode is used to automatically bring up a
// collection of Nomad servers. This can be used to automatically
// bring up a collection of nodes.
BootstrapExpect int
// bring up a collection of nodes. All operations on BootstrapExpect
// must be handled via `atomic.*Int32()` calls.
BootstrapExpect int32

// DataDir is the directory to store our state in
DataDir string
Expand Down Expand Up @@ -176,6 +178,9 @@ type Config struct {
// a new leader is elected, since we no longer know the status
// of all the heartbeats.
FailoverHeartbeatTTL time.Duration

// ConsulConfig is this Agent's Consul configuration
ConsulConfig *config.ConsulConfig
}

// CheckVersion is used to check if the ProtocolVersion is valid
Expand Down
18 changes: 11 additions & 7 deletions nomad/serf.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
package nomad

import "github.com/hashicorp/serf/serf"
import (
"sync/atomic"

"github.com/hashicorp/serf/serf"
)

const (
// StatusReap is used to update the status of a node if we
Expand Down Expand Up @@ -66,7 +70,7 @@ func (s *Server) nodeJoin(me serf.MemberEvent) {
s.peerLock.Unlock()

// If we still expecting to bootstrap, may need to handle this
if s.config.BootstrapExpect != 0 {
if atomic.LoadInt32(&s.config.BootstrapExpect) != 0 {
s.maybeBootstrap()
}
}
Expand All @@ -91,7 +95,7 @@ func (s *Server) maybeBootstrap() {
// Bootstrap can only be done if there are no committed logs,
// remove our expectations of bootstrapping
if index != 0 {
s.config.BootstrapExpect = 0
atomic.StoreInt32(&s.config.BootstrapExpect, 0)
return
}

Expand All @@ -106,7 +110,7 @@ func (s *Server) maybeBootstrap() {
if p.Region != s.config.Region {
continue
}
if p.Expect != 0 && p.Expect != s.config.BootstrapExpect {
if p.Expect != 0 && p.Expect != int(atomic.LoadInt32(&s.config.BootstrapExpect)) {
s.logger.Printf("[ERR] nomad: peer %v has a conflicting expect value. All nodes should expect the same number.", member)
return
}
Expand All @@ -118,7 +122,7 @@ func (s *Server) maybeBootstrap() {
}

// Skip if we haven't met the minimum expect count
if len(addrs) < s.config.BootstrapExpect {
if len(addrs) < int(atomic.LoadInt32(&s.config.BootstrapExpect)) {
return
}

Expand All @@ -128,8 +132,8 @@ func (s *Server) maybeBootstrap() {
s.logger.Printf("[ERR] nomad: failed to bootstrap peers: %v", err)
}

// Bootstrapping comlete, don't enter this again
s.config.BootstrapExpect = 0
// Bootstrapping complete, don't enter this again
atomic.StoreInt32(&s.config.BootstrapExpect, 0)
}

// nodeFailed is used to handle fail events on the serf cluster
Expand Down
Loading