From 1a854c444e98a1b03b0dd73353565a1ea90e2b1b Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Fri, 11 May 2018 15:52:05 -0400 Subject: [PATCH 01/21] add server join info to server and client --- client/client.go | 12 +-- client/client_test.go | 6 +- command/agent/agent_endpoint.go | 2 +- command/agent/command.go | 48 +++++++++-- command/agent/config-test-fixtures/basic.hcl | 14 +++ command/agent/config.go | 40 +++++++++ command/agent/config_parse.go | 72 +++++++++++++++- command/agent/config_parse_test.go | 12 +++ command/agent/retry_join.go | 43 +++++++--- command/agent/retry_join_test.go | 90 +++++++++++++------- 10 files changed, 274 insertions(+), 65 deletions(-) diff --git a/client/client.go b/client/client.go index 0ef9cda9f40..0cc2d81112b 100644 --- a/client/client.go +++ b/client/client.go @@ -283,7 +283,7 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic // Set the preconfigured list of static servers c.configLock.RLock() if len(c.configCopy.Servers) > 0 { - if err := c.setServersImpl(c.configCopy.Servers, true); err != nil { + if _, err := c.setServersImpl(c.configCopy.Servers, true); err != nil { logger.Printf("[WARN] client: None of the configured servers are valid: %v", err) } } @@ -623,7 +623,7 @@ func (c *Client) GetServers() []string { // SetServers sets a new list of nomad servers to connect to. As long as one // server is resolvable no error is returned. -func (c *Client) SetServers(in []string) error { +func (c *Client) SetServers(in []string) (int, error) { return c.setServersImpl(in, false) } @@ -633,7 +633,7 @@ func (c *Client) SetServers(in []string) error { // // Force should be used when setting the servers from the initial configuration // since the server may be starting up in parallel and initial pings may fail. -func (c *Client) setServersImpl(in []string, force bool) error { +func (c *Client) setServersImpl(in []string, force bool) (int, error) { var mu sync.Mutex var wg sync.WaitGroup var merr multierror.Error @@ -673,13 +673,13 @@ func (c *Client) setServersImpl(in []string, force bool) error { // Only return errors if no servers are valid if len(endpoints) == 0 { if len(merr.Errors) > 0 { - return merr.ErrorOrNil() + return 0, merr.ErrorOrNil() } - return noServersErr + return 0, noServersErr } c.servers.SetServers(endpoints) - return nil + return len(endpoints), nil } // restoreState is used to restore our state from the data dir diff --git a/client/client_test.go b/client/client_test.go index 7f86b70ba62..f697972d735 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -975,13 +975,13 @@ func TestClient_ServerList(t *testing.T) { if s := client.GetServers(); len(s) != 0 { t.Fatalf("expected server lit to be empty but found: %+q", s) } - if err := client.SetServers(nil); err != noServersErr { + if _, err := client.SetServers(nil); err != noServersErr { t.Fatalf("expected setting an empty list to return a 'no servers' error but received %v", err) } - if err := client.SetServers([]string{"123.456.13123.123.13:80"}); err == nil { + if _, err := client.SetServers([]string{"123.456.13123.123.13:80"}); err == nil { t.Fatalf("expected setting a bad server to return an error") } - if err := client.SetServers([]string{"123.456.13123.123.13:80", "127.0.0.1:1234", "127.0.0.1"}); err == nil { + if _, err := client.SetServers([]string{"123.456.13123.123.13:80", "127.0.0.1:1234", "127.0.0.1"}); err == nil { t.Fatalf("expected setting at least one good server to succeed but received: %v", err) } s := client.GetServers() diff --git a/command/agent/agent_endpoint.go b/command/agent/agent_endpoint.go index 54300177529..ade1e0ffe06 100644 --- a/command/agent/agent_endpoint.go +++ b/command/agent/agent_endpoint.go @@ -222,7 +222,7 @@ func (s *HTTPServer) updateServers(resp http.ResponseWriter, req *http.Request) // Set the servers list into the client s.agent.logger.Printf("[TRACE] Adding servers %+q to the client's primary server list", servers) - if err := client.SetServers(servers); err != nil { + if _, err := client.SetServers(servers); err != nil { s.agent.logger.Printf("[ERR] Attempt to add servers %q to client failed: %v", servers, err) //TODO is this the right error to return? return nil, CodedError(400, err.Error()) diff --git a/command/agent/command.go b/command/agent/command.go index f456c3aeb5a..5c30115ff64 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -549,13 +549,49 @@ func (c *Command) Run(args []string) int { // Start retry join process c.retryJoinErrCh = make(chan struct{}) - joiner := retryJoiner{ - join: c.agent.server.Join, - discover: &discover.Discover{}, - errCh: c.retryJoinErrCh, - logger: c.agent.logger, + if config.Server.Enabled && len(config.Server.RetryJoin) != 0 { + joiner := retryJoiner{ + discover: &discover.Discover{}, + errCh: c.retryJoinErrCh, + logger: c.agent.logger, + serverJoin: c.agent.server.Join, + serverEnabled: true, + } + + // This is for backwards compatibility, this should be removed in Nomad + // 0.10 and only ServerJoin should be declared on the server + serverJoinInfo := &ServerJoin{ + RetryJoin: config.Server.RetryJoin, + StartJoin: config.Server.StartJoin, + RetryMaxAttempts: config.Server.RetryMaxAttempts, + RetryInterval: config.Server.RetryInterval, + } + go joiner.RetryJoin(serverJoinInfo) + } + + if config.Server.Enabled && config.Server.ServerJoin != nil { + joiner := retryJoiner{ + discover: &discover.Discover{}, + errCh: c.retryJoinErrCh, + logger: c.agent.logger, + serverJoin: c.agent.server.Join, + serverEnabled: true, + } + + go joiner.RetryJoin(config.Server.ServerJoin) + } + + if config.Client.Enabled && config.Client.ServerJoin != nil { + joiner := retryJoiner{ + discover: &discover.Discover{}, + errCh: c.retryJoinErrCh, + logger: c.agent.logger, + clientJoin: c.agent.client.SetServers, + clientEnabled: true, + } + + go joiner.RetryJoin(config.Client.ServerJoin) } - go joiner.RetryJoin(config) // Wait for exit return c.handleSignals() diff --git a/command/agent/config-test-fixtures/basic.hcl b/command/agent/config-test-fixtures/basic.hcl index 7398dff43ca..4598235918b 100644 --- a/command/agent/config-test-fixtures/basic.hcl +++ b/command/agent/config-test-fixtures/basic.hcl @@ -19,6 +19,7 @@ advertise { rpc = "127.0.0.3" serf = "127.0.0.4" } + client { enabled = true state_dir = "/tmp/client-state" @@ -29,6 +30,13 @@ client { foo = "bar" baz = "zip" } + server_join_info { + retry_join = [ "1.1.1.1", "2.2.2.2" ] + start_join = [ "1.1.1.1", "2.2.2.2" ] + retry_max = 3 + retry_interval = "15s" + } + options { foo = "bar" baz = "zip" @@ -86,6 +94,12 @@ server { redundancy_zone = "foo" upgrade_version = "0.8.0" encrypt = "abc" + server_join_info { + retry_join = [ "1.1.1.1", "2.2.2.2" ] + start_join = [ "1.1.1.1", "2.2.2.2" ] + retry_max = 3 + retry_interval = "15s" + } } acl { enabled = true diff --git a/command/agent/config.go b/command/agent/config.go index 929d3d74b11..8f76ae50678 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -217,6 +217,9 @@ type ClientConfig struct { // NoHostUUID disables using the host's UUID and will force generation of a // random UUID. NoHostUUID *bool `mapstructure:"no_host_uuid"` + + // ServerJoin contains information that is used to attempt to join servers + ServerJoin *ServerJoin `mapstructure:"server_join_info"` } // ACLConfig is configuration specific to the ACL system @@ -311,19 +314,23 @@ type ServerConfig struct { // StartJoin is a list of addresses to attempt to join when the // agent starts. If Serf is unable to communicate with any of these // addresses, then the agent will error and exit. + // Deprecated in Nomad 0.10 StartJoin []string `mapstructure:"start_join"` // RetryJoin is a list of addresses to join with retry enabled. + // Deprecated in Nomad 0.10 RetryJoin []string `mapstructure:"retry_join"` // RetryMaxAttempts specifies the maximum number of times to retry joining a // host on startup. This is useful for cases where we know the node will be // online eventually. + // Deprecated in Nomad 0.10 RetryMaxAttempts int `mapstructure:"retry_max"` // RetryInterval specifies the amount of time to wait in between join // attempts on agent start. The minimum allowed value is 1 second and // the default is 30s. + // Deprecated in Nomad 0.10 RetryInterval string `mapstructure:"retry_interval"` retryInterval time.Duration `mapstructure:"-"` @@ -346,6 +353,32 @@ type ServerConfig struct { // Encryption key to use for the Serf communication EncryptKey string `mapstructure:"encrypt" json:"-"` + + // ServerJoin contains information that is used to attempt to join servers + ServerJoin *ServerJoin `mapstructure:"server_join_info"` +} + +// ServerJoin is used in both clients and servers to bootstrap connections to +// servers +type ServerJoin struct { + // StartJoin is a list of addresses to attempt to join when the + // agent starts. If Serf is unable to communicate with any of these + // addresses, then the agent will error and exit. + StartJoin []string `mapstructure:"start_join"` + + // RetryJoin is a list of addresses to join with retry enabled. + RetryJoin []string `mapstructure:"retry_join"` + + // RetryMaxAttempts specifies the maximum number of times to retry joining a + // host on startup. This is useful for cases where we know the node will be + // online eventually. + RetryMaxAttempts int `mapstructure:"retry_max"` + + // RetryInterval specifies the amount of time to wait in between join + // attempts on agent start. The minimum allowed value is 1 second and + // the default is 30s. + RetryInterval string `mapstructure:"retry_interval"` + retryInterval time.Duration `mapstructure:"-"` } // EncryptBytes returns the encryption key configured. @@ -1055,6 +1088,9 @@ func (a *ServerConfig) Merge(b *ServerConfig) *ServerConfig { if b.EncryptKey != "" { result.EncryptKey = b.EncryptKey } + if b.ServerJoin != nil { + result.ServerJoin = b.ServerJoin + } // Add the schedulers result.EnabledSchedulers = append(result.EnabledSchedulers, b.EnabledSchedulers...) @@ -1162,6 +1198,10 @@ func (a *ClientConfig) Merge(b *ClientConfig) *ClientConfig { result.ChrootEnv[k] = v } + if b.ServerJoin != nil { + result.ServerJoin = b.ServerJoin + } + return &result } diff --git a/command/agent/config_parse.go b/command/agent/config_parse.go index 9d82050cbcb..37408c4e365 100644 --- a/command/agent/config_parse.go +++ b/command/agent/config_parse.go @@ -370,6 +370,7 @@ func parseClient(result **ClientConfig, list *ast.ObjectList) error { "gc_parallel_destroys", "gc_max_allocs", "no_host_uuid", + "server_join_info", } if err := helper.CheckHCLKeys(listVal, valid); err != nil { return err @@ -385,6 +386,7 @@ func parseClient(result **ClientConfig, list *ast.ObjectList) error { delete(m, "chroot_env") delete(m, "reserved") delete(m, "stats") + delete(m, "server_join_info") var config ClientConfig dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{ @@ -448,6 +450,13 @@ func parseClient(result **ClientConfig, list *ast.ObjectList) error { } } + // Parse ServerJoin config + if o := listVal.Filter("server_join_info"); len(o.Items) > 0 { + if err := parseServerJoin(&config.ServerJoin, o); err != nil { + return multierror.Prefix(err, "server_join_info->") + } + } + *result = &config return nil } @@ -531,16 +540,20 @@ func parseServer(result **ServerConfig, list *ast.ObjectList) error { "heartbeat_grace", "min_heartbeat_ttl", "max_heartbeats_per_second", - "start_join", - "retry_join", - "retry_max", - "retry_interval", "rejoin_after_leave", "encrypt", "authoritative_region", "non_voting_server", "redundancy_zone", "upgrade_version", + + "server_join_info", + + // For backwards compatibility + "start_join", + "retry_join", + "retry_max", + "retry_interval", } if err := helper.CheckHCLKeys(listVal, valid); err != nil { return err @@ -551,6 +564,8 @@ func parseServer(result **ServerConfig, list *ast.ObjectList) error { return err } + delete(m, "server_join_info") + var config ServerConfig dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{ DecodeHook: mapstructure.StringToTimeDurationHookFunc(), @@ -570,10 +585,59 @@ func parseServer(result **ServerConfig, list *ast.ObjectList) error { } } + // Parse ServerJoin config + if o := listVal.Filter("server_join_info"); len(o.Items) > 0 { + if err := parseServerJoin(&config.ServerJoin, o); err != nil { + return multierror.Prefix(err, "server_join_info->") + } + } + *result = &config return nil } +func parseServerJoin(result **ServerJoin, list *ast.ObjectList) error { + list = list.Elem() + if len(list.Items) > 1 { + return fmt.Errorf("only one 'server_info_join' block allowed") + } + + // Get our object + listVal := list.Items[0].Val + + // Check for invalid keys + valid := []string{ + "start_join", + "retry_join", + "retry_max", + "retry_interval", + } + if err := helper.CheckHCLKeys(listVal, valid); err != nil { + return err + } + + var m map[string]interface{} + if err := hcl.DecodeObject(&m, listVal); err != nil { + return err + } + + var serverJoinInfo ServerJoin + dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{ + DecodeHook: mapstructure.StringToTimeDurationHookFunc(), + WeaklyTypedInput: true, + Result: &serverJoinInfo, + }) + if err != nil { + return err + } + if err := dec.Decode(m); err != nil { + return err + } + + *result = &serverJoinInfo + return nil +} + func parseACL(result **ACLConfig, list *ast.ObjectList) error { list = list.Elem() if len(list.Items) > 1 { diff --git a/command/agent/config_parse_test.go b/command/agent/config_parse_test.go index 994ed1d2a6d..d5316b9950b 100644 --- a/command/agent/config_parse_test.go +++ b/command/agent/config_parse_test.go @@ -47,6 +47,12 @@ func TestConfig_Parse(t *testing.T) { AllocDir: "/tmp/alloc", Servers: []string{"a.b.c:80", "127.0.0.1:1234"}, NodeClass: "linux-medium-64bit", + ServerJoin: &ServerJoin{ + RetryJoin: []string{"1.1.1.1", "2.2.2.2"}, + StartJoin: []string{"1.1.1.1", "2.2.2.2"}, + RetryInterval: "15s", + RetryMaxAttempts: 3, + }, Meta: map[string]string{ "foo": "bar", "baz": "zip", @@ -106,6 +112,12 @@ func TestConfig_Parse(t *testing.T) { RedundancyZone: "foo", UpgradeVersion: "0.8.0", EncryptKey: "abc", + ServerJoin: &ServerJoin{ + RetryJoin: []string{"1.1.1.1", "2.2.2.2"}, + StartJoin: []string{"1.1.1.1", "2.2.2.2"}, + RetryInterval: "15s", + RetryMaxAttempts: 3, + }, }, ACL: &ACLConfig{ Enabled: true, diff --git a/command/agent/retry_join.go b/command/agent/retry_join.go index 1a0aea8581d..0215ca98ee4 100644 --- a/command/agent/retry_join.go +++ b/command/agent/retry_join.go @@ -27,8 +27,17 @@ type DiscoverInterface interface { // retryJoiner is used to handle retrying a join until it succeeds or all of // its tries are exhausted. type retryJoiner struct { - // join adds the specified servers to the serf cluster - join func([]string) (int, error) + // serverJoin adds the specified servers to the serf cluster + serverJoin func([]string) (int, error) + + // serverEnabled indicates whether the nomad agent will run in server mode + serverEnabled bool + + // clientJoin adds the specified servers to the serf cluster + clientJoin func([]string) (int, error) + + // clientEnabled indicates whether the nomad agent will run in client mode + clientEnabled bool // discover is of type Discover, where this is either the go-discover // implementation or a mock used for testing @@ -44,21 +53,21 @@ type retryJoiner struct { // retryJoin is used to handle retrying a join until it succeeds or all retries // are exhausted. -func (r *retryJoiner) RetryJoin(config *Config) { - if len(config.Server.RetryJoin) == 0 || !config.Server.Enabled { +func (r *retryJoiner) RetryJoin(serverJoin *ServerJoin) { + if len(serverJoin.RetryJoin) == 0 { return } attempt := 0 - addrsToJoin := strings.Join(config.Server.RetryJoin, " ") + addrsToJoin := strings.Join(serverJoin.RetryJoin, " ") r.logger.Printf("[INFO] agent: Joining cluster... %s", addrsToJoin) for { var addrs []string var err error - for _, addr := range config.Server.RetryJoin { + for _, addr := range serverJoin.RetryJoin { switch { case strings.HasPrefix(addr, "provider="): servers, err := r.discover.Addrs(addr, r.logger) @@ -73,14 +82,24 @@ func (r *retryJoiner) RetryJoin(config *Config) { } if len(addrs) > 0 { - n, err := r.join(addrs) - if err == nil { - r.logger.Printf("[INFO] agent: Join completed. Synced with %d initial agents", n) + if r.serverEnabled && r.serverJoin != nil { + n, err := r.serverJoin(addrs) + if err == nil { + r.logger.Printf("[INFO] agent: Join completed. Synced with %d initial agents", n) + return + } + } + if r.clientEnabled && r.clientJoin != nil { + n, err := r.clientJoin(addrs) + if err == nil { + r.logger.Printf("[INFO] agent: Join completed. Synced with %d initial agents", n) + return + } } } attempt++ - if config.Server.RetryMaxAttempts > 0 && attempt > config.Server.RetryMaxAttempts { + if serverJoin.RetryMaxAttempts > 0 && attempt > serverJoin.RetryMaxAttempts { r.logger.Printf("[ERR] agent: max join retry exhausted, exiting") close(r.errCh) return @@ -88,8 +107,8 @@ func (r *retryJoiner) RetryJoin(config *Config) { if err != nil { r.logger.Printf("[WARN] agent: Join failed: %v, retrying in %v", err, - config.Server.RetryInterval) + serverJoin.RetryInterval) } - time.Sleep(config.Server.retryInterval) + time.Sleep(serverJoin.retryInterval) } } diff --git a/command/agent/retry_join_test.go b/command/agent/retry_join_test.go index 34b381373ab..7948360dcc7 100644 --- a/command/agent/retry_join_test.go +++ b/command/agent/retry_join_test.go @@ -82,12 +82,9 @@ func TestRetryJoin_NonCloud(t *testing.T) { t.Parallel() require := require.New(t) - newConfig := &Config{ - Server: &ServerConfig{ - RetryMaxAttempts: 1, - RetryJoin: []string{"127.0.0.1"}, - Enabled: true, - }, + serverJoin := &ServerJoin{ + RetryMaxAttempts: 1, + RetryJoin: []string{"127.0.0.1"}, } var output []string @@ -98,13 +95,14 @@ func TestRetryJoin_NonCloud(t *testing.T) { } joiner := retryJoiner{ - discover: &MockDiscover{}, - join: mockJoin, - logger: log.New(ioutil.Discard, "", 0), - errCh: make(chan struct{}), + discover: &MockDiscover{}, + serverJoin: mockJoin, + serverEnabled: true, + logger: log.New(ioutil.Discard, "", 0), + errCh: make(chan struct{}), } - joiner.RetryJoin(newConfig) + joiner.RetryJoin(serverJoin) require.Equal(1, len(output)) require.Equal(stubAddress, output[0]) @@ -114,12 +112,9 @@ func TestRetryJoin_Cloud(t *testing.T) { t.Parallel() require := require.New(t) - newConfig := &Config{ - Server: &ServerConfig{ - RetryMaxAttempts: 1, - RetryJoin: []string{"provider=aws, tag_value=foo"}, - Enabled: true, - }, + serverJoin := &ServerJoin{ + RetryMaxAttempts: 1, + RetryJoin: []string{"provider=aws, tag_value=foo"}, } var output []string @@ -131,13 +126,14 @@ func TestRetryJoin_Cloud(t *testing.T) { mockDiscover := &MockDiscover{} joiner := retryJoiner{ - discover: mockDiscover, - join: mockJoin, - logger: log.New(ioutil.Discard, "", 0), - errCh: make(chan struct{}), + discover: mockDiscover, + serverJoin: mockJoin, + serverEnabled: true, + logger: log.New(ioutil.Discard, "", 0), + errCh: make(chan struct{}), } - joiner.RetryJoin(newConfig) + joiner.RetryJoin(serverJoin) require.Equal(1, len(output)) require.Equal("provider=aws, tag_value=foo", mockDiscover.ReceivedAddrs) @@ -148,12 +144,9 @@ func TestRetryJoin_MixedProvider(t *testing.T) { t.Parallel() require := require.New(t) - newConfig := &Config{ - Server: &ServerConfig{ - RetryMaxAttempts: 1, - RetryJoin: []string{"provider=aws, tag_value=foo", "127.0.0.1"}, - Enabled: true, - }, + serverJoin := &ServerJoin{ + RetryMaxAttempts: 1, + RetryJoin: []string{"provider=aws, tag_value=foo", "127.0.0.1"}, } var output []string @@ -165,15 +158,46 @@ func TestRetryJoin_MixedProvider(t *testing.T) { mockDiscover := &MockDiscover{} joiner := retryJoiner{ - discover: mockDiscover, - join: mockJoin, - logger: log.New(ioutil.Discard, "", 0), - errCh: make(chan struct{}), + discover: mockDiscover, + serverJoin: mockJoin, + serverEnabled: true, + logger: log.New(ioutil.Discard, "", 0), + errCh: make(chan struct{}), } - joiner.RetryJoin(newConfig) + joiner.RetryJoin(serverJoin) require.Equal(2, len(output)) require.Equal("provider=aws, tag_value=foo", mockDiscover.ReceivedAddrs) require.Equal(stubAddress, output[0]) } + +func TestRetryJoin_Client(t *testing.T) { + t.Parallel() + require := require.New(t) + + serverJoin := &ServerJoin{ + RetryMaxAttempts: 1, + RetryJoin: []string{"127.0.0.1"}, + } + + var output []string + + mockJoin := func(s []string) (int, error) { + output = s + return 0, nil + } + + joiner := retryJoiner{ + discover: &MockDiscover{}, + clientJoin: mockJoin, + clientEnabled: true, + logger: log.New(ioutil.Discard, "", 0), + errCh: make(chan struct{}), + } + + joiner.RetryJoin(serverJoin) + + require.Equal(1, len(output)) + require.Equal(stubAddress, output[0]) +} From f68a0405cb8a1c850b9bf2300b9e4079c34b33ef Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Mon, 21 May 2018 13:17:35 -0400 Subject: [PATCH 02/21] add documentation --- .../docs/agent/configuration/client.html.md | 16 ++++++++++++++++ .../docs/agent/configuration/server.html.md | 17 ++++++++++++----- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/website/source/docs/agent/configuration/client.html.md b/website/source/docs/agent/configuration/client.html.md index 2f96da892a0..f3e9ab0366c 100644 --- a/website/source/docs/agent/configuration/client.html.md +++ b/website/source/docs/agent/configuration/client.html.md @@ -90,6 +90,22 @@ client { receive work. This may be specified as an IP address or DNS, with or without the port. If the port is omitted, the default port of `4647` is used. +- `server_join` `(map[string]string` - Specifies the list of server information + to retry joining. The fields contained are: + + - `retry_join` `(array: [])` - Specifies a list of server + addresses to retry joining if the first attempt fails. The list of + addresses will be tried in the order specified, until one + succeeds. After one succeeds, no further addresses will be contacted. This is + useful for cases where we know the address will become available eventually. + + - `retry_interval` `(string: "30s")` - Specifies the time to wait between retry + join attempts. + + - `retry_max` `(int: 0)` - Specifies the maximum number of join attempts to be + made before exiting with a return code of 1. By default, this is set to 0 + which is interpreted as infinite retries. + - `state_dir` `(string: "[data_dir]/client")` - Specifies the directory to use to store client state. By default, this is - the top-level [data_dir](/docs/agent/configuration/index.html#data_dir) suffixed with diff --git a/website/source/docs/agent/configuration/server.html.md b/website/source/docs/agent/configuration/server.html.md index 56b5e4abcd6..92e62f4920b 100644 --- a/website/source/docs/agent/configuration/server.html.md +++ b/website/source/docs/agent/configuration/server.html.md @@ -102,9 +102,9 @@ server { second is a tradeoff as it lowers failure detection time of nodes at the tradeoff of false positives and increased load on the leader. -- `non_voting_server` `(bool: false)` - (Enterprise-only) Specifies whether - this server will act as a non-voting member of the cluster to help provide - read scalability. +- `non_voting_server` `(bool: false)` - (Enterprise-only) Specifies whether + this server will act as a non-voting member of the cluster to help provide + read scalability. - `num_schedulers` `(int: [num-cores])` - Specifies the number of parallel scheduler threads to run. This can be as many as one per core, or `0` to @@ -131,8 +131,8 @@ server { cluster again when starting. This flag allows the previous state to be used to rejoin the cluster. -- `retry_join` `(array: [])` - Specifies a list of server addresses to - retry joining if the first attempt fails. This is similar to +- `retry_join` `(array: [])` - Specifies a list of server + addresses to retry joining if the first attempt fails. This is similar to [`start_join`](#start_join), but only invokes if the initial join attempt fails. The list of addresses will be tried in the order specified, until one succeeds. After one succeeds, no further addresses will be contacted. This is @@ -148,6 +148,13 @@ server { made before exiting with a return code of 1. By default, this is set to 0 which is interpreted as infinite retries. +- `server_join` `(map[string]string` - Specifies the list of server information + to retry joining. The fields contained are [retry_join](#retry_join), + [retry_interval](#retry_interval), [retry_max](#retry_max), and + [start_join](start_join). These fields will only be able to be specified in + the `server_join` stanza after Nomad 0.10 and will are deprecated as top-level + configuration on the server stanza. + - `start_join` `(array: [])` - Specifies a list of server addresses to join on startup. If Nomad is unable to join with any of the specified addresses, agent startup will fail. See the From 02b89ae0f4878b34c6f04b79c1dced2ddae77fd0 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Mon, 21 May 2018 18:41:28 -0400 Subject: [PATCH 03/21] update server_join naming and improve logging --- command/agent/command.go | 3 +-- command/agent/config-test-fixtures/basic.hcl | 5 ++--- command/agent/config.go | 4 ++-- command/agent/config_parse.go | 18 +++++++++--------- command/agent/config_parse_test.go | 1 - command/agent/retry_join.go | 4 ++-- command/agent/retry_join_test.go | 6 +++--- 7 files changed, 19 insertions(+), 22 deletions(-) diff --git a/command/agent/command.go b/command/agent/command.go index 5c30115ff64..902d89c595a 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -558,8 +558,7 @@ func (c *Command) Run(args []string) int { serverEnabled: true, } - // This is for backwards compatibility, this should be removed in Nomad - // 0.10 and only ServerJoin should be declared on the server + // COMPAT: Remove in 0.10 and only use ServerJoin serverJoinInfo := &ServerJoin{ RetryJoin: config.Server.RetryJoin, StartJoin: config.Server.StartJoin, diff --git a/command/agent/config-test-fixtures/basic.hcl b/command/agent/config-test-fixtures/basic.hcl index 4598235918b..264cf819696 100644 --- a/command/agent/config-test-fixtures/basic.hcl +++ b/command/agent/config-test-fixtures/basic.hcl @@ -30,9 +30,8 @@ client { foo = "bar" baz = "zip" } - server_join_info { + server_join { retry_join = [ "1.1.1.1", "2.2.2.2" ] - start_join = [ "1.1.1.1", "2.2.2.2" ] retry_max = 3 retry_interval = "15s" } @@ -94,7 +93,7 @@ server { redundancy_zone = "foo" upgrade_version = "0.8.0" encrypt = "abc" - server_join_info { + server_join { retry_join = [ "1.1.1.1", "2.2.2.2" ] start_join = [ "1.1.1.1", "2.2.2.2" ] retry_max = 3 diff --git a/command/agent/config.go b/command/agent/config.go index 8f76ae50678..c744e550368 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -219,7 +219,7 @@ type ClientConfig struct { NoHostUUID *bool `mapstructure:"no_host_uuid"` // ServerJoin contains information that is used to attempt to join servers - ServerJoin *ServerJoin `mapstructure:"server_join_info"` + ServerJoin *ServerJoin `mapstructure:"server_join"` } // ACLConfig is configuration specific to the ACL system @@ -355,7 +355,7 @@ type ServerConfig struct { EncryptKey string `mapstructure:"encrypt" json:"-"` // ServerJoin contains information that is used to attempt to join servers - ServerJoin *ServerJoin `mapstructure:"server_join_info"` + ServerJoin *ServerJoin `mapstructure:"server_join"` } // ServerJoin is used in both clients and servers to bootstrap connections to diff --git a/command/agent/config_parse.go b/command/agent/config_parse.go index 37408c4e365..66f704f6ca0 100644 --- a/command/agent/config_parse.go +++ b/command/agent/config_parse.go @@ -370,7 +370,7 @@ func parseClient(result **ClientConfig, list *ast.ObjectList) error { "gc_parallel_destroys", "gc_max_allocs", "no_host_uuid", - "server_join_info", + "server_join", } if err := helper.CheckHCLKeys(listVal, valid); err != nil { return err @@ -386,7 +386,7 @@ func parseClient(result **ClientConfig, list *ast.ObjectList) error { delete(m, "chroot_env") delete(m, "reserved") delete(m, "stats") - delete(m, "server_join_info") + delete(m, "server_join") var config ClientConfig dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{ @@ -451,9 +451,9 @@ func parseClient(result **ClientConfig, list *ast.ObjectList) error { } // Parse ServerJoin config - if o := listVal.Filter("server_join_info"); len(o.Items) > 0 { + if o := listVal.Filter("server_join"); len(o.Items) > 0 { if err := parseServerJoin(&config.ServerJoin, o); err != nil { - return multierror.Prefix(err, "server_join_info->") + return multierror.Prefix(err, "server_join->") } } @@ -547,7 +547,7 @@ func parseServer(result **ServerConfig, list *ast.ObjectList) error { "redundancy_zone", "upgrade_version", - "server_join_info", + "server_join", // For backwards compatibility "start_join", @@ -564,7 +564,7 @@ func parseServer(result **ServerConfig, list *ast.ObjectList) error { return err } - delete(m, "server_join_info") + delete(m, "server_join") var config ServerConfig dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{ @@ -586,9 +586,9 @@ func parseServer(result **ServerConfig, list *ast.ObjectList) error { } // Parse ServerJoin config - if o := listVal.Filter("server_join_info"); len(o.Items) > 0 { + if o := listVal.Filter("server_join"); len(o.Items) > 0 { if err := parseServerJoin(&config.ServerJoin, o); err != nil { - return multierror.Prefix(err, "server_join_info->") + return multierror.Prefix(err, "server_join->") } } @@ -599,7 +599,7 @@ func parseServer(result **ServerConfig, list *ast.ObjectList) error { func parseServerJoin(result **ServerJoin, list *ast.ObjectList) error { list = list.Elem() if len(list.Items) > 1 { - return fmt.Errorf("only one 'server_info_join' block allowed") + return fmt.Errorf("only one 'server_join' block allowed") } // Get our object diff --git a/command/agent/config_parse_test.go b/command/agent/config_parse_test.go index d5316b9950b..95387b6a8c0 100644 --- a/command/agent/config_parse_test.go +++ b/command/agent/config_parse_test.go @@ -49,7 +49,6 @@ func TestConfig_Parse(t *testing.T) { NodeClass: "linux-medium-64bit", ServerJoin: &ServerJoin{ RetryJoin: []string{"1.1.1.1", "2.2.2.2"}, - StartJoin: []string{"1.1.1.1", "2.2.2.2"}, RetryInterval: "15s", RetryMaxAttempts: 3, }, diff --git a/command/agent/retry_join.go b/command/agent/retry_join.go index 0215ca98ee4..eb7329b4c97 100644 --- a/command/agent/retry_join.go +++ b/command/agent/retry_join.go @@ -85,14 +85,14 @@ func (r *retryJoiner) RetryJoin(serverJoin *ServerJoin) { if r.serverEnabled && r.serverJoin != nil { n, err := r.serverJoin(addrs) if err == nil { - r.logger.Printf("[INFO] agent: Join completed. Synced with %d initial agents", n) + r.logger.Printf("[INFO] agent: Join completed. Server synced with %d initial servers", n) return } } if r.clientEnabled && r.clientJoin != nil { n, err := r.clientJoin(addrs) if err == nil { - r.logger.Printf("[INFO] agent: Join completed. Synced with %d initial agents", n) + r.logger.Printf("[INFO] agent: Join completed. Client synced with %d initial servers", n) return } } diff --git a/command/agent/retry_join_test.go b/command/agent/retry_join_test.go index 7948360dcc7..dbd7b0abab7 100644 --- a/command/agent/retry_join_test.go +++ b/command/agent/retry_join_test.go @@ -78,7 +78,7 @@ func TestRetryJoin_Integration(t *testing.T) { }) } -func TestRetryJoin_NonCloud(t *testing.T) { +func TestRetryJoin_Server_NonCloud(t *testing.T) { t.Parallel() require := require.New(t) @@ -108,7 +108,7 @@ func TestRetryJoin_NonCloud(t *testing.T) { require.Equal(stubAddress, output[0]) } -func TestRetryJoin_Cloud(t *testing.T) { +func TestRetryJoin_Server_Cloud(t *testing.T) { t.Parallel() require := require.New(t) @@ -140,7 +140,7 @@ func TestRetryJoin_Cloud(t *testing.T) { require.Equal(stubAddress, output[0]) } -func TestRetryJoin_MixedProvider(t *testing.T) { +func TestRetryJoin_Server_MixedProvider(t *testing.T) { t.Parallel() require := require.New(t) From 0770f03031d67b9b5e2c79dac118c1e52f279b09 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Tue, 22 May 2018 14:14:41 -0400 Subject: [PATCH 04/21] update documentation for server_join --- command/agent/command.go | 5 + command/agent/retry_join.go | 34 ++++ command/agent/retry_join_test.go | 149 ++++++++++++++++++ .../docs/agent/configuration/client.html.md | 17 +- .../docs/agent/configuration/server.html.md | 97 ++---------- 5 files changed, 200 insertions(+), 102 deletions(-) diff --git a/command/agent/command.go b/command/agent/command.go index 902d89c595a..dc33ef16a63 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -558,6 +558,11 @@ func (c *Command) Run(args []string) int { serverEnabled: true, } + if err := joiner.Validate(config); err != nil { + c.Ui.Error(err.Error()) + return 1 + } + // COMPAT: Remove in 0.10 and only use ServerJoin serverJoinInfo := &ServerJoin{ RetryJoin: config.Server.RetryJoin, diff --git a/command/agent/retry_join.go b/command/agent/retry_join.go index eb7329b4c97..567185944da 100644 --- a/command/agent/retry_join.go +++ b/command/agent/retry_join.go @@ -1,6 +1,7 @@ package agent import ( + "fmt" "log" "strings" "time" @@ -51,6 +52,39 @@ type retryJoiner struct { logger *log.Logger } +// Validate ensures that the configuration passes validity checks for the +// retry_join stanza. If the configuration is not valid, returns an error that +// will be displayed to the operator, otherwise nil. +func (r *retryJoiner) Validate(config *Config) error { + + // If retry_join is defined for the server, ensure that deprecated + // fields and the server_join stanza are not both set + if config.Server != nil && config.Server.ServerJoin != nil { + if len(config.Server.RetryJoin) != 0 { + return fmt.Errorf("server_join and retry_join cannot both be defined; try defining only server_join") + } + if len(config.Server.StartJoin) != 0 { + return fmt.Errorf("server_join and start_join cannot both be defined; try defining only server_join") + } + if config.Server.RetryMaxAttempts != 0 { + return fmt.Errorf("server_join and retry_max cannot both be defined; try defining only server_join") + } + if config.Server.RetryInterval != "0" { + return fmt.Errorf("server_join and retry_interval cannot both be defined; try defining only server_join") + } + } + + // if retry_join is defined for the client, ensure that start_join is not + // set as this configuration is only defined for servers. + if config.Client != nil && config.Client.ServerJoin != nil { + if config.Client.ServerJoin.StartJoin != nil { + return fmt.Errorf("server_join is not supported for Nomad clients") + } + } + + return nil +} + // retryJoin is used to handle retrying a join until it succeeds or all retries // are exhausted. func (r *retryJoiner) RetryJoin(serverJoin *ServerJoin) { diff --git a/command/agent/retry_join_test.go b/command/agent/retry_join_test.go index dbd7b0abab7..98afe0cfbbf 100644 --- a/command/agent/retry_join_test.go +++ b/command/agent/retry_join_test.go @@ -201,3 +201,152 @@ func TestRetryJoin_Client(t *testing.T) { require.Equal(1, len(output)) require.Equal(stubAddress, output[0]) } + +func TestRetryJoin_Validate(t *testing.T) { + t.Parallel() + require := require.New(t) + + type validateExpect struct { + config *Config + isValid bool + reason string + } + + scenarios := []*validateExpect{ + { + config: &Config{ + Server: &ServerConfig{ + ServerJoin: &ServerJoin{ + RetryJoin: []string{"127.0.0.1"}, + RetryMaxAttempts: 0, + RetryInterval: "0", + StartJoin: []string{}, + }, + RetryJoin: []string{"127.0.0.1"}, + RetryMaxAttempts: 0, + RetryInterval: "0", + StartJoin: []string{}, + }, + }, + isValid: false, + reason: "server_join cannot be defined if retry_join is defined on the server stanza", + }, + { + config: &Config{ + Server: &ServerConfig{ + ServerJoin: &ServerJoin{ + RetryJoin: []string{"127.0.0.1"}, + RetryMaxAttempts: 0, + RetryInterval: "0", + StartJoin: []string{}, + }, + StartJoin: []string{"127.0.0.1"}, + RetryMaxAttempts: 0, + RetryInterval: "0", + RetryJoin: []string{}, + }, + }, + isValid: false, + reason: "server_join cannot be defined if start_join is defined on the server stanza", + }, + { + config: &Config{ + Server: &ServerConfig{ + ServerJoin: &ServerJoin{ + RetryJoin: []string{"127.0.0.1"}, + RetryMaxAttempts: 0, + RetryInterval: "0", + StartJoin: []string{}, + }, + StartJoin: []string{}, + RetryMaxAttempts: 1, + RetryInterval: "0", + RetryJoin: []string{}, + }, + }, + isValid: false, + reason: "server_join cannot be defined if retry_max_attempts is defined on the server stanza", + }, + { + config: &Config{ + Server: &ServerConfig{ + ServerJoin: &ServerJoin{ + RetryJoin: []string{"127.0.0.1"}, + RetryMaxAttempts: 0, + RetryInterval: "0", + StartJoin: []string{}, + }, + StartJoin: []string{}, + RetryMaxAttempts: 0, + RetryInterval: "1", + RetryJoin: []string{}, + }, + }, + isValid: false, + reason: "server_join cannot be defined if retry_interval is defined on the server stanza", + }, + { + config: &Config{ + Client: &ClientConfig{ + ServerJoin: &ServerJoin{ + RetryJoin: []string{"127.0.0.1"}, + RetryMaxAttempts: 0, + RetryInterval: "0", + StartJoin: []string{"127.0.0.1"}, + }, + }, + }, + isValid: false, + reason: "start_join should not be defined on the client", + }, + { + config: &Config{ + Client: &ClientConfig{ + ServerJoin: &ServerJoin{ + RetryJoin: []string{"127.0.0.1"}, + RetryMaxAttempts: 0, + RetryInterval: "0", + }, + }, + }, + isValid: true, + reason: "client server_join should be valid", + }, + { + config: &Config{ + Server: &ServerConfig{ + ServerJoin: &ServerJoin{ + RetryJoin: []string{"127.0.0.1"}, + RetryMaxAttempts: 0, + RetryInterval: "0", + StartJoin: []string{}, + }, + StartJoin: []string{}, + RetryMaxAttempts: 0, + RetryInterval: "0", + RetryJoin: []string{}, + }, + }, + isValid: true, + reason: "server server_join should be valid", + }, + { + config: &Config{ + Server: &ServerConfig{ + StartJoin: []string{"127.0.0.1"}, + RetryMaxAttempts: 1, + RetryInterval: "0", + RetryJoin: []string{}, + }, + }, + isValid: true, + reason: "server deprecated retry_join configuration should be valid", + }, + } + + joiner := retryJoiner{} + for _, scenario := range scenarios { + err := joiner.Validate(scenario.config) + require.Equal(err == nil, scenario.isValid, scenario.reason) + } +} diff --git a/website/source/docs/agent/configuration/client.html.md b/website/source/docs/agent/configuration/client.html.md index f3e9ab0366c..b08bcdd9878 100644 --- a/website/source/docs/agent/configuration/client.html.md +++ b/website/source/docs/agent/configuration/client.html.md @@ -90,21 +90,8 @@ client { receive work. This may be specified as an IP address or DNS, with or without the port. If the port is omitted, the default port of `4647` is used. -- `server_join` `(map[string]string` - Specifies the list of server information - to retry joining. The fields contained are: - - - `retry_join` `(array: [])` - Specifies a list of server - addresses to retry joining if the first attempt fails. The list of - addresses will be tried in the order specified, until one - succeeds. After one succeeds, no further addresses will be contacted. This is - useful for cases where we know the address will become available eventually. - - - `retry_interval` `(string: "30s")` - Specifies the time to wait between retry - join attempts. - - - `retry_max` `(int: 0)` - Specifies the maximum number of join attempts to be - made before exiting with a return code of 1. By default, this is set to 0 - which is interpreted as infinite retries. +- `server_join` ([ServerJoin][server_join]: nil) - Specifies + configuration which is specific to retry joining Nomad servers. - `state_dir` `(string: "[data_dir]/client")` - Specifies the directory to use to store client state. By default, this is - the top-level diff --git a/website/source/docs/agent/configuration/server.html.md b/website/source/docs/agent/configuration/server.html.md index 92e62f4920b..1132f1a6735 100644 --- a/website/source/docs/agent/configuration/server.html.md +++ b/website/source/docs/agent/configuration/server.html.md @@ -54,83 +54,7 @@ server { - `enabled` `(bool: false)` - Specifies if this agent should run in server mode. All other server options depend on this value being set. -- `enabled_schedulers` `(array: [all])` - Specifies which sub-schedulers - this server will handle. This can be used to restrict the evaluations that - worker threads will dequeue for processing. - -- `encrypt` `(string: "")` - Specifies the secret key to use for encryption of - Nomad server's gossip network traffic. This key must be 16 bytes that are - base64-encoded. The provided key is automatically persisted to the data - directory and loaded automatically whenever the agent is restarted. This means - that to encrypt Nomad server's gossip protocol, this option only needs to be - provided once on each agent's initial startup sequence. If it is provided - after Nomad has been initialized with an encryption key, then the provided key - is ignored and a warning will be displayed. See the - [Nomad encryption documentation][encryption] for more details on this option - and its impact on the cluster. - -- `node_gc_threshold` `(string: "24h")` - Specifies how long a node must be in a - terminal state before it is garbage collected and purged from the system. This - is specified using a label suffix like "30s" or "1h". - -- `job_gc_threshold` `(string: "4h")` - Specifies the minimum time a job must be - in the terminal state before it is eligible for garbage collection. This is - specified using a label suffix like "30s" or "1h". - -- `eval_gc_threshold` `(string: "1h")` - Specifies the minimum time an - evaluation must be in the terminal state before it is eligible for garbage - collection. This is specified using a label suffix like "30s" or "1h". - -- `deployment_gc_threshold` `(string: "1h")` - Specifies the minimum time a - deployment must be in the terminal state before it is eligible for garbage - collection. This is specified using a label suffix like "30s" or "1h". - -- `heartbeat_grace` `(string: "10s")` - Specifies the additional time given as a - grace period beyond the heartbeat TTL of nodes to account for network and - processing delays as well as clock skew. This is specified using a label - suffix like "30s" or "1h". - -- `min_heartbeat_ttl` `(string: "10s")` - Specifies the minimum time between - node heartbeats. This is used as a floor to prevent excessive updates. This is - specified using a label suffix like "30s" or "1h". Lowering the minimum TTL is - a tradeoff as it lowers failure detection time of nodes at the tradeoff of - false positives and increased load on the leader. - -- `max_heartbeats_per_second` `(float: 50.0)` - Specifies the maximum target - rate of heartbeats being processed per second. This allows the TTL to be - increased to meet the target rate. Increasing the maximum heartbeats per - second is a tradeoff as it lowers failure detection time of nodes at the - tradeoff of false positives and increased load on the leader. - -- `non_voting_server` `(bool: false)` - (Enterprise-only) Specifies whether - this server will act as a non-voting member of the cluster to help provide - read scalability. - -- `num_schedulers` `(int: [num-cores])` - Specifies the number of parallel - scheduler threads to run. This can be as many as one per core, or `0` to - disallow this server from making any scheduling decisions. This defaults to - the number of CPU cores. - -- `protocol_version` `(int: 1)` - Specifies the Nomad protocol version to use - when communicating with other Nomad servers. This value is typically not - required as the agent internally knows the latest version, but may be useful - in some upgrade scenarios. - -- `raft_protocol` `(int: 2)` - Specifies the Raft protocol version to use when - communicating with other Nomad servers. This affects available Autopilot - features and is typically not required as the agent internally knows the - latest version, but may be useful in some upgrade scenarios. - -- `redundancy_zone` `(string: "")` - (Enterprise-only) Specifies the redundancy - zone that this server will be a part of for Autopilot management. For more - information, see the [Autopilot Guide](/guides/cluster/autopilot.html). - -- `rejoin_after_leave` `(bool: false)` - Specifies if Nomad will ignore a - previous leave and attempt to rejoin the cluster when starting. By default, - Nomad treats leave as a permanent intent and does not attempt to join the - cluster again when starting. This flag allows the previous state to be used to - rejoin the cluster. - +- - `retry_join` `(array: [])` - Specifies a list of server addresses to retry joining if the first attempt fails. This is similar to [`start_join`](#start_join), but only invokes if the initial join attempt @@ -139,27 +63,26 @@ server { useful for cases where we know the address will become available eventually. Use `retry_join` with an array as a replacement for `start_join`, **do not use both options**. See the [server address format](#server-address-format) - section for more information on the format of the string. + section for more information on the format of the string. This field is + deprecated in favor of [server_join](#server_join). - `retry_interval` `(string: "30s")` - Specifies the time to wait between retry - join attempts. + join attempts. This field is deprecated in favor of [server_join](#server_join). - `retry_max` `(int: 0)` - Specifies the maximum number of join attempts to be made before exiting with a return code of 1. By default, this is set to 0 - which is interpreted as infinite retries. + which is interpreted as infinite retries. This field is deprecated in favor + of [server_join](#server_join). -- `server_join` `(map[string]string` - Specifies the list of server information - to retry joining. The fields contained are [retry_join](#retry_join), - [retry_interval](#retry_interval), [retry_max](#retry_max), and - [start_join](start_join). These fields will only be able to be specified in - the `server_join` stanza after Nomad 0.10 and will are deprecated as top-level - configuration on the server stanza. +- `server_join` ([ServerJoin][server_join]: nil) - Specifies + configuration which is specific to retry joining Nomad servers. - `start_join` `(array: [])` - Specifies a list of server addresses to join on startup. If Nomad is unable to join with any of the specified addresses, agent startup will fail. See the [server address format](#server-address-format) section for more information - on the format of the string. + on the format of the string. This field is deprecated in favor of + [server_join](#server_join). - `upgrade_version` `(string: "")` - A custom version of the format X.Y.Z to use in place of the Nomad version when custom upgrades are enabled in Autopilot. From b4a0f2cc8ca0985402231e514a3f1b15666051e8 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Tue, 22 May 2018 16:54:05 -0400 Subject: [PATCH 05/21] add further configuration validation for server_join --- command/agent/command.go | 10 ++++++++++ command/agent/retry_join.go | 2 +- command/agent/retry_join_test.go | 16 +++++++++++++++- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/command/agent/command.go b/command/agent/command.go index dc33ef16a63..19835fd1993 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -582,6 +582,11 @@ func (c *Command) Run(args []string) int { serverEnabled: true, } + if err := joiner.Validate(config); err != nil { + c.Ui.Error(err.Error()) + return 1 + } + go joiner.RetryJoin(config.Server.ServerJoin) } @@ -594,6 +599,11 @@ func (c *Command) Run(args []string) int { clientEnabled: true, } + if err := joiner.Validate(config); err != nil { + c.Ui.Error(err.Error()) + return 1 + } + go joiner.RetryJoin(config.Client.ServerJoin) } diff --git a/command/agent/retry_join.go b/command/agent/retry_join.go index 567185944da..430b7052e89 100644 --- a/command/agent/retry_join.go +++ b/command/agent/retry_join.go @@ -78,7 +78,7 @@ func (r *retryJoiner) Validate(config *Config) error { // set as this configuration is only defined for servers. if config.Client != nil && config.Client.ServerJoin != nil { if config.Client.ServerJoin.StartJoin != nil { - return fmt.Errorf("server_join is not supported for Nomad clients") + return fmt.Errorf("start_join is not supported for Nomad clients") } } diff --git a/command/agent/retry_join_test.go b/command/agent/retry_join_test.go index 98afe0cfbbf..f4306e12bee 100644 --- a/command/agent/retry_join_test.go +++ b/command/agent/retry_join_test.go @@ -287,7 +287,7 @@ func TestRetryJoin_Validate(t *testing.T) { }, { config: &Config{ - Client: &ClientConfig{ + Server: &ServerConfig{ ServerJoin: &ServerJoin{ RetryJoin: []string{"127.0.0.1"}, RetryMaxAttempts: 0, @@ -297,6 +297,20 @@ func TestRetryJoin_Validate(t *testing.T) { }, }, isValid: false, + reason: "start_join and retry_join should not both be defined", + }, + { + config: &Config{ + Client: &ClientConfig{ + ServerJoin: &ServerJoin{ + RetryJoin: []string{}, + RetryMaxAttempts: 0, + RetryInterval: "0", + StartJoin: []string{"127.0.0.1"}, + }, + }, + }, + isValid: false, reason: "start_join should not be defined on the client", }, { From 98f29479794314ea2735b7d61f48d03ff27286ef Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Tue, 22 May 2018 17:30:13 -0400 Subject: [PATCH 06/21] cloud auto join documentation --- .../configuration/cloud-auto-join.html.md | 133 ++++++++++++++++++ .../docs/agent/configuration/server.html.md | 106 +++++++++++++- .../agent/configuration/server_join.html.md | 126 +++++++++++++++++ 3 files changed, 358 insertions(+), 7 deletions(-) create mode 100644 website/source/docs/agent/configuration/cloud-auto-join.html.md create mode 100644 website/source/docs/agent/configuration/server_join.html.md diff --git a/website/source/docs/agent/configuration/cloud-auto-join.html.md b/website/source/docs/agent/configuration/cloud-auto-join.html.md new file mode 100644 index 00000000000..bcf7d525097 --- /dev/null +++ b/website/source/docs/agent/configuration/cloud-auto-join.html.md @@ -0,0 +1,133 @@ +--- +layout: "docs" +page_title: "Cloud Auto Join- Agent Configuration" +sidebar_current: "docs-agent-configuration-cloud_auto_join" +description: |- +Nomad supports automatic cluster joining using cloud metadata on various providers. +--- + +# Cloud Auto-joining + +As of Nomad 0.8.4, `retry-join` accepts a unified interface using the +[go-discover](https://github.com/hashicorp/go-discover) library for doing +automatic cluster joining using cloud metadata. To use retry-join with a +supported cloud provider, specify the configuration on the command line or +configuration file as a `key=value key=value ...` string. + +Values are taken literally and must not be URL +encoded. If the values contain spaces, backslashes or double quotes then +they need to be double quoted and the usual escaping rules apply. + +```json +{ + "retry_join": ["provider=my-cloud config=val config2=\"some other val\" ..."] +} +``` + +The cloud provider-specific configurations are detailed below. This can be +combined with static IP or DNS addresses or even multiple configurations +for different providers. + +In order to use discovery behind a proxy, you will need to set +`HTTP_PROXY`, `HTTPS_PROXY` and `NO_PROXY` environment variables per +[Golang `net/http` library](https://golang.org/pkg/net/http/#ProxyFromEnvironment). + +The following sections give the options specific to each supported cloud +provider. + +### Amazon EC2 + +This returns the first private IP address of all servers in the given +region which have the given `tag_key` and `tag_value`. + + +```json +{ + "retry_join": ["provider=aws tag_key=... tag_value=..."] +} +``` + +- `provider` (required) - the name of the provider ("aws" in this case). +- `tag_key` (required) - the key of the tag to auto-join on. +- `tag_value` (required) - the value of the tag to auto-join on. +- `region` (optional) - the AWS region to authenticate in. +- `addr_type` (optional) - the type of address to discover: `private_v4`, `public_v4`, `public_v6`. Default is `private_v4`. (>= 1.0) +- `access_key_id` (optional) - the AWS access key for authentication (see below for more information about authenticating). +- `secret_access_key` (optional) - the AWS secret access key for authentication (see below for more information about authenticating). + +#### Authentication & Precedence + +- Static credentials `access_key_id=... secret_access_key=...` +- Environment variables (`AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`) +- Shared credentials file (`~/.aws/credentials` or the path specified by `AWS_SHARED_CREDENTIALS_FILE`) +- ECS task role metadata (container-specific). +- EC2 instance role metadata. + + The only required IAM permission is `ec2:DescribeInstances`, and it is + recommended that you make a dedicated key used only for auto-joining. If the + region is omitted it will be discovered through the local instance's [EC2 + metadata + endpoint](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-identity-documents.html). + +### Microsoft Azure + + This returns the first private IP address of all servers in the given region + which have the given `tag_key` and `tag_value` in the tenant and subscription, or in + the given `resource_group` of a `vm_scale_set` for Virtual Machine Scale Sets. + + + ```json +{ + "retry_join": ["provider=azure tag_name=... tag_value=... tenant_id=... client_id=... subscription_id=... secret_access_key=..."] +} +``` + +- `provider` (required) - the name of the provider ("azure" in this case). +- `tenant_id` (required) - the tenant to join machines in. +- `client_id` (required) - the client to authenticate with. +- `secret_access_key` (required) - the secret client key. + +Use these configuration parameters when using tags: +- `tag_name` - the name of the tag to auto-join on. +- `tag_value` - the value of the tag to auto-join on. + +Use these configuration parameters when using Virtual Machine Scale Sets (Consul 1.0.3 and later): +- `resource_group` - the name of the resource group to filter on. +- `vm_scale_set` - the name of the virtual machine scale set to filter on. + + When using tags the only permission needed is the `ListAll` method for `NetworkInterfaces`. When using + Virtual Machine Scale Sets the only role action needed is `Microsoft.Compute/virtualMachineScaleSets/*/read`. + +### Google Compute Engine + +This returns the first private IP address of all servers in the given +project which have the given `tag_value`. +``` + +```json +{ +"retry_join": ["provider=gce project_name=... tag_value=..."] +} +``` + +- `provider` (required) - the name of the provider ("gce" in this case). +- `tag_value` (required) - the value of the tag to auto-join on. +- `project_name` (optional) - the name of the project to auto-join on. Discovered if not set. +- `zone_pattern` (optional) - the list of zones can be restricted through an RE2 compatible regular expression. If omitted, servers in all zones are returned. +- `credentials_file` (optional) - the credentials file for authentication. See below for more information. + +#### Authentication & Precedence + +- Use credentials from `credentials_file`, if provided. +- Use JSON file from `GOOGLE_APPLICATION_CREDENTIALS` environment variable. +- Use JSON file in a location known to the gcloud command-line tool. +- On Windows, this is `%APPDATA%/gcloud/application_default_credentials.json`. +- On other systems, `$HOME/.config/gcloud/application_default_credentials.json`. +- On Google Compute Engine, use credentials from the metadata +server. In this final case any provided scopes are ignored. + +Discovery requires a [GCE Service +Account](https://cloud.google.com/compute/docs/access/service-accounts). +Credentials are searched using the following paths, in order of precedence. + + diff --git a/website/source/docs/agent/configuration/server.html.md b/website/source/docs/agent/configuration/server.html.md index 1132f1a6735..c4df16ced39 100644 --- a/website/source/docs/agent/configuration/server.html.md +++ b/website/source/docs/agent/configuration/server.html.md @@ -54,9 +54,85 @@ server { - `enabled` `(bool: false)` - Specifies if this agent should run in server mode. All other server options depend on this value being set. -- -- `retry_join` `(array: [])` - Specifies a list of server - addresses to retry joining if the first attempt fails. This is similar to +- `enabled_schedulers` `(array: [all])` - Specifies which sub-schedulers + this server will handle. This can be used to restrict the evaluations that + worker threads will dequeue for processing. + +- `encrypt` `(string: "")` - Specifies the secret key to use for encryption of + Nomad server's gossip network traffic. This key must be 16 bytes that are + base64-encoded. The provided key is automatically persisted to the data + directory and loaded automatically whenever the agent is restarted. This means + that to encrypt Nomad server's gossip protocol, this option only needs to be + provided once on each agent's initial startup sequence. If it is provided + after Nomad has been initialized with an encryption key, then the provided key + is ignored and a warning will be displayed. See the + [Nomad encryption documentation][encryption] for more details on this option + and its impact on the cluster. + +- `node_gc_threshold` `(string: "24h")` - Specifies how long a node must be in a + terminal state before it is garbage collected and purged from the system. This + is specified using a label suffix like "30s" or "1h". + +- `job_gc_threshold` `(string: "4h")` - Specifies the minimum time a job must be + in the terminal state before it is eligible for garbage collection. This is + specified using a label suffix like "30s" or "1h". + +- `eval_gc_threshold` `(string: "1h")` - Specifies the minimum time an + evaluation must be in the terminal state before it is eligible for garbage + collection. This is specified using a label suffix like "30s" or "1h". + +- `deployment_gc_threshold` `(string: "1h")` - Specifies the minimum time a + deployment must be in the terminal state before it is eligible for garbage + collection. This is specified using a label suffix like "30s" or "1h". + +- `heartbeat_grace` `(string: "10s")` - Specifies the additional time given as a + grace period beyond the heartbeat TTL of nodes to account for network and + processing delays as well as clock skew. This is specified using a label + suffix like "30s" or "1h". + +- `min_heartbeat_ttl` `(string: "10s")` - Specifies the minimum time between + node heartbeats. This is used as a floor to prevent excessive updates. This is + specified using a label suffix like "30s" or "1h". Lowering the minimum TTL is + a tradeoff as it lowers failure detection time of nodes at the tradeoff of + false positives and increased load on the leader. + +- `max_heartbeats_per_second` `(float: 50.0)` - Specifies the maximum target + rate of heartbeats being processed per second. This allows the TTL to be + increased to meet the target rate. Increasing the maximum heartbeats per + second is a tradeoff as it lowers failure detection time of nodes at the + tradeoff of false positives and increased load on the leader. + +- `non_voting_server` `(bool: false)` - (Enterprise-only) Specifies whether + this server will act as a non-voting member of the cluster to help provide + read scalability. + +- `num_schedulers` `(int: [num-cores])` - Specifies the number of parallel + scheduler threads to run. This can be as many as one per core, or `0` to + disallow this server from making any scheduling decisions. This defaults to + the number of CPU cores. + +- `protocol_version` `(int: 1)` - Specifies the Nomad protocol version to use + when communicating with other Nomad servers. This value is typically not + required as the agent internally knows the latest version, but may be useful + in some upgrade scenarios. + +- `raft_protocol` `(int: 2)` - Specifies the Raft protocol version to use when + communicating with other Nomad servers. This affects available Autopilot + features and is typically not required as the agent internally knows the + latest version, but may be useful in some upgrade scenarios. + +- `redundancy_zone` `(string: "")` - (Enterprise-only) Specifies the redundancy + zone that this server will be a part of for Autopilot management. For more + information, see the [Autopilot Guide](/guides/cluster/autopilot.html). + +- `rejoin_after_leave` `(bool: false)` - Specifies if Nomad will ignore a + previous leave and attempt to rejoin the cluster when starting. By default, + Nomad treats leave as a permanent intent and does not attempt to join the + cluster again when starting. This flag allows the previous state to be used to + rejoin the cluster. + +- `retry_join` `(array: [])` - Specifies a list of server addresses to + retry joining if the first attempt fails. This is similar to [`start_join`](#start_join), but only invokes if the initial join attempt fails. The list of addresses will be tried in the order specified, until one succeeds. After one succeeds, no further addresses will be contacted. This is @@ -66,17 +142,22 @@ server { section for more information on the format of the string. This field is deprecated in favor of [server_join](#server_join). + Note that `retry_join` can be defined for only servers as a command-line + flag (clients are only able to define via the client configuration). + + ```sh + $ nomad agent -retry-join "127.0.0.1:4648" + ``` + - `retry_interval` `(string: "30s")` - Specifies the time to wait between retry - join attempts. This field is deprecated in favor of [server_join](#server_join). + join attempts. This field is deprecated in favor of + [server_join](#server_join). - `retry_max` `(int: 0)` - Specifies the maximum number of join attempts to be made before exiting with a return code of 1. By default, this is set to 0 which is interpreted as infinite retries. This field is deprecated in favor of [server_join](#server_join). -- `server_join` ([ServerJoin][server_join]: nil) - Specifies - configuration which is specific to retry joining Nomad servers. - - `start_join` `(array: [])` - Specifies a list of server addresses to join on startup. If Nomad is unable to join with any of the specified addresses, agent startup will fail. See the @@ -126,6 +207,17 @@ unless configured otherwise: nomad-01.company.local => nomad-01.company.local:4648 ``` +#### Via the go-discover interface + +As of Nomad 0.9, `retry-join` accepts a unified interface using the +[go-discover](https://github.com/hashicorp/go-discover) library for doing +automated cluster joining using cloud metadata. + +``` +"provider=aws tag_key=..." => 1.2.3.4:4648 +``` + + ## `server` Examples ### Common Setup diff --git a/website/source/docs/agent/configuration/server_join.html.md b/website/source/docs/agent/configuration/server_join.html.md new file mode 100644 index 00000000000..fd395062661 --- /dev/null +++ b/website/source/docs/agent/configuration/server_join.html.md @@ -0,0 +1,126 @@ +--- +layout: "docs" +page_title: "server_join Stanza - Agent Configuration" +sidebar_current: "docs-agent-configuration-server_join" +description: |- + The "server_join" stanza configures the Nomad agent to enable retry_join logic for connecting to Nomad servers. +--- + +# `server_join` Stanza + + + + + + +
Placement + **acl** +
+ +The `server_join` stanza configures the Nomad agent to enable retry_join logic for connecting to Nomad servers. + +```hcl +server_join { + retry_join = [ "1.1.1.1", "2.2.2.2" ] + start_join = [ "1.1.1.1", "2.2.2.2" ] + retry_max = 3 + retry_interval = "15s" +} +``` + +## `server_join` Parameters + +- `retry_join` `(array: [])` - Specifies a list of server + addresses to retry joining if the first attempt fails. This is similar to + [`start_join`](#start_join), but only invokes if the initial join attempt + fails, and is available to both Nomad servers and clients, while + `start_join` is only defined for Nomad servers. The list of addresses will + be tried in the order specified, until one succeeds. After one succeeds, no + further addresses will be contacted. This is + useful for cases where we know the address will become available eventually. + Use `retry_join` with an array as a replacement for `start_join`, **do not use + both options**. + +Address format includes both using IP addresses as well as an interface to the +[go-discover](https://github.com/hashicorp/go-discover) library for doing +automated cluster joining using cloud metadata. +See Cloud Auto Join`([CloudAutoJoin][cloud_auto_join]: nil). +``` +server_join { + retry_join = [ "1.1.1.1", "2.2.2.2" ] +} +``` +Using the `go-discover` interface, this can be defined both in a client or +server configuration as well as provided as a command-line argument. +``` +server_join { + retry_join = [ "provider=aws tag_key=..." ] +} +``` +See the [server address format](#server-address-format) for more information +about expected server address formats. + +- `retry_interval` `(string: "30s")` - Specifies the time to wait between retry + join attempts. + +- `retry_max` `(int: 0)` - Specifies the maximum number of join attempts to be + made before exiting with a return code of 1. By default, this is set to 0 + which is interpreted as infinite retries. + +- `start_join` `(array: [])` - Specifies a list of server addresses to + join on startup. If Nomad is unable to join with any of the specified + addresses, agent startup will fail. See the + [server address format](#server-address-format) section for more information + on the format of the string. This field is defined only for Nomad servers and + will result in a configuration parse error if included in a client + configuration. + +### Server Address Format + +This section describes the acceptable syntax and format for describing the +location of a Nomad server. There are many ways to reference a Nomad server, +including directly by IP address and resolving through DNS. + +#### Directly via IP Address + +It is possible to address another Nomad server using its IP address. This is +done in the `ip:port` format, such as: + +``` +1.2.3.4:5678 +``` + +If the port option is omitted, it defaults to the Serf port, which is 4648 +unless configured otherwise: + +``` +1.2.3.4 => 1.2.3.4:4648 +``` + +#### Via Domains or DNS + +It is possible to address another Nomad server using its DNS address. This is +done in the `address:port` format, such as: + +``` +nomad-01.company.local:5678 +``` + +If the port option is omitted, it defaults to the Serf port, which is 4648 +unless configured otherwise: + +``` +nomad-01.company.local => nomad-01.company.local:4648 +``` + +#### Via the go-discover interface + +As of Nomad 0.9, `retry-join` accepts a unified interface using the +[go-discover](https://github.com/hashicorp/go-discover) library for doing +automated cluster joining using cloud metadata. + +``` +"provider=aws tag_key=..." => 1.2.3.4:4648 + +See ([CloudAutoJoin][cloud_auto_join]: nil) for further information. +``` From ef90b44927ed09efd8b261c9d11f1e311ea4ccda Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Wed, 23 May 2018 19:24:12 -0400 Subject: [PATCH 07/21] add to sidebar navidation clean up styling of new pages --- .../docs/agent/configuration/cloud-auto-join.html.md | 6 +++--- .../source/docs/agent/configuration/server_join.html.md | 9 --------- website/source/layouts/docs.erb | 6 ++++++ 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/website/source/docs/agent/configuration/cloud-auto-join.html.md b/website/source/docs/agent/configuration/cloud-auto-join.html.md index bcf7d525097..d912317e49b 100644 --- a/website/source/docs/agent/configuration/cloud-auto-join.html.md +++ b/website/source/docs/agent/configuration/cloud-auto-join.html.md @@ -1,9 +1,9 @@ --- layout: "docs" -page_title: "Cloud Auto Join- Agent Configuration" -sidebar_current: "docs-agent-configuration-cloud_auto_join" +page_title: "Cloud Auto-join" +sidebar_current: "docs-agent-cloud-auto-join" description: |- -Nomad supports automatic cluster joining using cloud metadata on various providers. + Nomad supports automatic cluster joining using cloud metadata on various providers. --- # Cloud Auto-joining diff --git a/website/source/docs/agent/configuration/server_join.html.md b/website/source/docs/agent/configuration/server_join.html.md index fd395062661..1f6d89d00fb 100644 --- a/website/source/docs/agent/configuration/server_join.html.md +++ b/website/source/docs/agent/configuration/server_join.html.md @@ -8,15 +8,6 @@ description: |- # `server_join` Stanza - - - - - -
Placement - **acl** -
- The `server_join` stanza configures the Nomad agent to enable retry_join logic for connecting to Nomad servers. ```hcl diff --git a/website/source/layouts/docs.erb b/website/source/layouts/docs.erb index b0e3b7250fc..1d40a927d89 100644 --- a/website/source/layouts/docs.erb +++ b/website/source/layouts/docs.erb @@ -405,6 +405,9 @@
  • > autopilot
  • +
  • > + cloud auto join +
  • > client
  • @@ -417,6 +420,9 @@
  • > server
  • +
  • > + server join +
  • > telemetry
  • From 023cc2c3b7704f348cf4d5677da7876cc53eeae6 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Thu, 24 May 2018 17:19:51 -0400 Subject: [PATCH 08/21] set retryInterval and other code feedback --- command/agent/command.go | 8 ---- command/agent/config.go | 17 +++++++- command/agent/config_test.go | 67 ++++++++++++++++++++++++++++++++ command/agent/retry_join.go | 27 +++++++++++++ command/agent/retry_join_test.go | 42 +++++++++++++++++++- 5 files changed, 151 insertions(+), 10 deletions(-) diff --git a/command/agent/command.go b/command/agent/command.go index 19835fd1993..309ab6c69df 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -267,14 +267,6 @@ func (c *Command) readConfig() *Config { } } - // Parse the RetryInterval. - dur, err := time.ParseDuration(config.Server.RetryInterval) - if err != nil { - c.Ui.Error(fmt.Sprintf("Error parsing retry interval: %s", err)) - return nil - } - config.Server.retryInterval = dur - // Check that the server is running in at least one mode. if !(config.Server.Enabled || config.Client.Enabled) { c.Ui.Error("Must specify either server, client or dev mode for the agent.") diff --git a/command/agent/config.go b/command/agent/config.go index c744e550368..3c7bf3b93f7 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -381,6 +381,21 @@ type ServerJoin struct { retryInterval time.Duration `mapstructure:"-"` } +func (s *ServerJoin) Merge(b *ServerJoin) { + if len(b.StartJoin) != 0 { + s.StartJoin = b.StartJoin + } + if len(b.RetryJoin) != 0 { + s.RetryJoin = b.RetryJoin + } + if b.RetryMaxAttempts != 0 { + s.RetryMaxAttempts = b.RetryMaxAttempts + } + if b.RetryInterval != "" { + s.RetryInterval = b.RetryInterval + } +} + // EncryptBytes returns the encryption key configured. func (s *ServerConfig) EncryptBytes() ([]byte, error) { return base64.StdEncoding.DecodeString(s.EncryptKey) @@ -1089,7 +1104,7 @@ func (a *ServerConfig) Merge(b *ServerConfig) *ServerConfig { result.EncryptKey = b.EncryptKey } if b.ServerJoin != nil { - result.ServerJoin = b.ServerJoin + result.ServerJoin.Merge(b.ServerJoin) } // Add the schedulers diff --git a/command/agent/config_test.go b/command/agent/config_test.go index d0be57cffcc..33ccd8d26a4 100644 --- a/command/agent/config_test.go +++ b/command/agent/config_test.go @@ -14,6 +14,7 @@ import ( "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/nomad/structs/config" + "github.com/stretchr/testify/require" ) var ( @@ -907,3 +908,69 @@ func TestIsMissingPort(t *testing.T) { t.Errorf("expected no error, but got %v", err) } } + +func TestMergeServerJoin(t *testing.T) { + require := require.New(t) + + { + retryJoin := []string{"127.0.0.1", "127.0.0.2"} + startJoin := []string{"127.0.0.1", "127.0.0.2"} + retryMaxAttempts := 1 + retryInterval := "1" + + a := &ServerJoin{ + RetryJoin: retryJoin, + StartJoin: startJoin, + RetryMaxAttempts: retryMaxAttempts, + RetryInterval: retryInterval, + } + b := &ServerJoin{} + + a.Merge(b) + require.Equal(a.RetryJoin, retryJoin) + require.Equal(a.StartJoin, startJoin) + require.Equal(a.RetryMaxAttempts, retryMaxAttempts) + require.Equal(a.RetryInterval, retryInterval) + } + { + retryJoin := []string{"127.0.0.1", "127.0.0.2"} + startJoin := []string{"127.0.0.1", "127.0.0.2"} + retryMaxAttempts := 1 + retryInterval := "1" + + a := &ServerJoin{} + b := &ServerJoin{ + RetryJoin: retryJoin, + StartJoin: startJoin, + RetryMaxAttempts: retryMaxAttempts, + RetryInterval: retryInterval, + } + + a.Merge(b) + require.Equal(a.RetryJoin, retryJoin) + require.Equal(a.StartJoin, startJoin) + require.Equal(a.RetryMaxAttempts, retryMaxAttempts) + require.Equal(a.RetryInterval, retryInterval) + } + { + retryJoin := []string{"127.0.0.1", "127.0.0.2"} + startJoin := []string{"127.0.0.1", "127.0.0.2"} + retryMaxAttempts := 1 + retryInterval := "1" + + a := &ServerJoin{ + RetryJoin: retryJoin, + StartJoin: startJoin, + } + b := &ServerJoin{ + RetryMaxAttempts: retryMaxAttempts, + RetryInterval: retryInterval, + } + + a.Merge(b) + require.Equal(a.RetryJoin, retryJoin) + require.Equal(a.StartJoin, startJoin) + require.Equal(a.RetryMaxAttempts, retryMaxAttempts) + require.Equal(a.RetryInterval, retryInterval) + } +} diff --git a/command/agent/retry_join.go b/command/agent/retry_join.go index 430b7052e89..315fe52650a 100644 --- a/command/agent/retry_join.go +++ b/command/agent/retry_join.go @@ -82,6 +82,33 @@ func (r *retryJoiner) Validate(config *Config) error { } } + if config.Server != nil { + dur, err := time.ParseDuration(config.Server.RetryInterval) + if err != nil { + return fmt.Errorf("Error parsing server retry interval: %s", err) + } else { + config.Server.retryInterval = dur + } + + if config.Server.ServerJoin != nil { + dur, err := time.ParseDuration(config.Server.RetryInterval) + if err != nil { + return fmt.Errorf("Error parsing server retry interval: %s", err) + } else { + config.Server.ServerJoin.retryInterval = dur + } + } + } + + if config.Client != nil && config.Client.ServerJoin != nil { + dur, err := time.ParseDuration(config.Client.ServerJoin.RetryInterval) + if err != nil { + return fmt.Errorf("Error parsing retry interval: %s", err) + } else { + config.Client.ServerJoin.retryInterval = dur + } + } + return nil } diff --git a/command/agent/retry_join_test.go b/command/agent/retry_join_test.go index f4306e12bee..a7e129aca1f 100644 --- a/command/agent/retry_join_test.go +++ b/command/agent/retry_join_test.go @@ -349,13 +349,53 @@ func TestRetryJoin_Validate(t *testing.T) { Server: &ServerConfig{ StartJoin: []string{"127.0.0.1"}, RetryMaxAttempts: 1, - RetryInterval: "0", + RetryInterval: "3s", RetryJoin: []string{}, }, }, isValid: true, reason: "server deprecated retry_join configuration should be valid", }, + { + config: &Config{ + Server: &ServerConfig{ + StartJoin: []string{"127.0.0.1"}, + RetryMaxAttempts: 1, + RetryInterval: "invalid!TimeInterval", + RetryJoin: []string{}, + }, + }, + isValid: false, + reason: "invalid time interval", + }, + { + config: &Config{ + Server: &ServerConfig{ + ServerJoin: &ServerJoin{ + StartJoin: []string{"127.0.0.1"}, + RetryMaxAttempts: 1, + RetryInterval: "invalid!TimeInterval", + RetryJoin: []string{}, + }, + }, + }, + isValid: false, + reason: "invalid time interval", + }, + { + config: &Config{ + Client: &ClientConfig{ + ServerJoin: &ServerJoin{ + StartJoin: []string{"127.0.0.1"}, + RetryMaxAttempts: 1, + RetryInterval: "invalid!TimeInterval", + RetryJoin: []string{}, + }, + }, + }, + isValid: false, + reason: "invalid time interval", + }, } joiner := retryJoiner{} From e93dc5326936eed1195d012a5874e45eb87f7426 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Thu, 24 May 2018 17:46:52 -0400 Subject: [PATCH 09/21] update documentation --- .../docs/agent/configuration/client.html.md | 3 +- .../docs/agent/configuration/server.html.md | 72 +++---------------- .../agent/configuration/server_join.html.md | 6 +- .../source/docs/commands/agent.html.md.erb | 7 ++ 4 files changed, 23 insertions(+), 65 deletions(-) diff --git a/website/source/docs/agent/configuration/client.html.md b/website/source/docs/agent/configuration/client.html.md index b08bcdd9878..ae46e1f9301 100644 --- a/website/source/docs/agent/configuration/client.html.md +++ b/website/source/docs/agent/configuration/client.html.md @@ -90,7 +90,7 @@ client { receive work. This may be specified as an IP address or DNS, with or without the port. If the port is omitted, the default port of `4647` is used. -- `server_join` ([ServerJoin][server_join]: nil) - Specifies +- `server_join` [ServerJoin][server_join] - Specifies configuration which is specific to retry joining Nomad servers. - `state_dir` `(string: "[data_dir]/client")` - Specifies the directory to use @@ -349,3 +349,4 @@ client { } } ``` +[server_join]: /docs/agent/configuration/server_join.html "Server Join" diff --git a/website/source/docs/agent/configuration/server.html.md b/website/source/docs/agent/configuration/server.html.md index c4df16ced39..ca229dc2fc2 100644 --- a/website/source/docs/agent/configuration/server.html.md +++ b/website/source/docs/agent/configuration/server.html.md @@ -131,6 +131,15 @@ server { cluster again when starting. This flag allows the previous state to be used to rejoin the cluster. +- `server_join` [ServerJoin][server_join] - Specifies + configuration which is specific to retry joining Nomad servers. + +- `upgrade_version` `(string: "")` - A custom version of the format X.Y.Z to use + in place of the Nomad version when custom upgrades are enabled in Autopilot. + For more information, see the [Autopilot Guide](/guides/cluster/autopilot.html). + +### Deprecated Parameters + - `retry_join` `(array: [])` - Specifies a list of server addresses to retry joining if the first attempt fails. This is similar to [`start_join`](#start_join), but only invokes if the initial join attempt @@ -138,17 +147,10 @@ server { succeeds. After one succeeds, no further addresses will be contacted. This is useful for cases where we know the address will become available eventually. Use `retry_join` with an array as a replacement for `start_join`, **do not use - both options**. See the [server address format](#server-address-format) + both options**. See the [server_join](#server_join) section for more information on the format of the string. This field is deprecated in favor of [server_join](#server_join). - Note that `retry_join` can be defined for only servers as a command-line - flag (clients are only able to define via the client configuration). - - ```sh - $ nomad agent -retry-join "127.0.0.1:4648" - ``` - - `retry_interval` `(string: "30s")` - Specifies the time to wait between retry join attempts. This field is deprecated in favor of [server_join](#server_join). @@ -165,59 +167,6 @@ server { on the format of the string. This field is deprecated in favor of [server_join](#server_join). -- `upgrade_version` `(string: "")` - A custom version of the format X.Y.Z to use - in place of the Nomad version when custom upgrades are enabled in Autopilot. - For more information, see the [Autopilot Guide](/guides/cluster/autopilot.html). - -### Server Address Format - -This section describes the acceptable syntax and format for describing the -location of a Nomad server. There are many ways to reference a Nomad server, -including directly by IP address and resolving through DNS. - -#### Directly via IP Address - -It is possible to address another Nomad server using its IP address. This is -done in the `ip:port` format, such as: - -``` -1.2.3.4:5678 -``` - -If the port option is omitted, it defaults to the Serf port, which is 4648 -unless configured otherwise: - -``` -1.2.3.4 => 1.2.3.4:4648 -``` - -#### Via Domains or DNS - -It is possible to address another Nomad server using its DNS address. This is -done in the `address:port` format, such as: - -``` -nomad-01.company.local:5678 -``` - -If the port option is omitted, it defaults to the Serf port, which is 4648 -unless configured otherwise: - -``` -nomad-01.company.local => nomad-01.company.local:4648 -``` - -#### Via the go-discover interface - -As of Nomad 0.9, `retry-join` accepts a unified interface using the -[go-discover](https://github.com/hashicorp/go-discover) library for doing -automated cluster joining using cloud metadata. - -``` -"provider=aws tag_key=..." => 1.2.3.4:4648 -``` - - ## `server` Examples ### Common Setup @@ -264,3 +213,4 @@ server { ``` [encryption]: /docs/agent/encryption.html "Nomad Agent Encryption" +[server_join]: /docs/agent/configuration/server_join.html "Server Join" diff --git a/website/source/docs/agent/configuration/server_join.html.md b/website/source/docs/agent/configuration/server_join.html.md index 1f6d89d00fb..8b6881685f5 100644 --- a/website/source/docs/agent/configuration/server_join.html.md +++ b/website/source/docs/agent/configuration/server_join.html.md @@ -3,12 +3,12 @@ layout: "docs" page_title: "server_join Stanza - Agent Configuration" sidebar_current: "docs-agent-configuration-server_join" description: |- - The "server_join" stanza configures the Nomad agent to enable retry_join logic for connecting to Nomad servers. + The server_join stanza specifies how the Nomad agent will discover and connect to Nomad servers. --- # `server_join` Stanza -The `server_join` stanza configures the Nomad agent to enable retry_join logic for connecting to Nomad servers. +The server_join stanza specifies how the Nomad agent will discover and connect to Nomad servers. ```hcl server_join { @@ -106,7 +106,7 @@ nomad-01.company.local => nomad-01.company.local:4648 #### Via the go-discover interface -As of Nomad 0.9, `retry-join` accepts a unified interface using the +As of Nomad 0.8.4, `retry-join` accepts a unified interface using the [go-discover](https://github.com/hashicorp/go-discover) library for doing automated cluster joining using cloud metadata. diff --git a/website/source/docs/commands/agent.html.md.erb b/website/source/docs/commands/agent.html.md.erb index 58dc9bc0327..ea3b4f40b64 100644 --- a/website/source/docs/commands/agent.html.md.erb +++ b/website/source/docs/commands/agent.html.md.erb @@ -72,6 +72,13 @@ via CLI arguments. The `agent` command accepts the following arguments: * `-rejoin`: Equivalent to the [rejoin_after_leave](#rejoin_after_leave) config option. * `-retry-interval`: Equivalent to the [retry_interval](#retry_interval) config option. * `-retry-join`: Similar to `-join` but allows retrying a join if the first attempt fails. + +Note that `retry_join` can be defined for only servers as a command-line +flag (clients are only able to define via the client configuration). +```sh +$ nomad agent -retry-join "127.0.0.1:4648" +` + * `-retry-max`: Similar to the [retry_max](#retry_max) config option. * `-server`: Enable server mode on the local agent. * `-servers=`: Equivalent to the Client [servers](#servers) config From 58a61313d42f4e37a6371c4c470832745365c1d3 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Thu, 24 May 2018 19:56:09 -0400 Subject: [PATCH 10/21] links to server_join docs --- website/source/docs/agent/configuration/client.html.md | 2 +- website/source/docs/agent/configuration/server.html.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/website/source/docs/agent/configuration/client.html.md b/website/source/docs/agent/configuration/client.html.md index ae46e1f9301..3e2f2d64ee3 100644 --- a/website/source/docs/agent/configuration/client.html.md +++ b/website/source/docs/agent/configuration/client.html.md @@ -349,4 +349,4 @@ client { } } ``` -[server_join]: /docs/agent/configuration/server_join.html "Server Join" +[server_join]: /docs/agent/configuration/server_join.html.md "Server Join" diff --git a/website/source/docs/agent/configuration/server.html.md b/website/source/docs/agent/configuration/server.html.md index ca229dc2fc2..248a9e6d1b4 100644 --- a/website/source/docs/agent/configuration/server.html.md +++ b/website/source/docs/agent/configuration/server.html.md @@ -131,7 +131,7 @@ server { cluster again when starting. This flag allows the previous state to be used to rejoin the cluster. -- `server_join` [ServerJoin][server_join] - Specifies +- `server_join` - Specifies the [Server Join][server_join] stanza for configuration which is specific to retry joining Nomad servers. - `upgrade_version` `(string: "")` - A custom version of the format X.Y.Z to use @@ -213,4 +213,4 @@ server { ``` [encryption]: /docs/agent/encryption.html "Nomad Agent Encryption" -[server_join]: /docs/agent/configuration/server_join.html "Server Join" +[server_join]: /docs/agent/configuration/server_join.html.md "Server Join" From 02a64d0760751cb78052334d250b4d7772ad91d2 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Fri, 25 May 2018 16:04:32 -0400 Subject: [PATCH 11/21] fix doc links and other improvements --- .../docs/agent/configuration/client.html.md | 10 ++-- .../configuration/cloud-auto-join.html.md | 7 +-- .../docs/agent/configuration/server.html.md | 22 +++++---- .../agent/configuration/server_join.html.md | 49 ++++++++++--------- website/source/guides/cluster/manual.html.md | 4 +- website/source/layouts/docs.erb | 4 +- 6 files changed, 54 insertions(+), 42 deletions(-) diff --git a/website/source/docs/agent/configuration/client.html.md b/website/source/docs/agent/configuration/client.html.md index 3e2f2d64ee3..4cb00e6c0b1 100644 --- a/website/source/docs/agent/configuration/client.html.md +++ b/website/source/docs/agent/configuration/client.html.md @@ -90,7 +90,7 @@ client { receive work. This may be specified as an IP address or DNS, with or without the port. If the port is omitted, the default port of `4647` is used. -- `server_join` [ServerJoin][server_join] - Specifies +- `server_join` ([server_join](#server-join): nil) - Specifies configuration which is specific to retry joining Nomad servers. - `state_dir` `(string: "[data_dir]/client")` - Specifies the directory to use @@ -310,7 +310,11 @@ cluster. ```hcl client { enabled = true - servers = ["1.2.3.4:4647", "5.6.7.8:4647"] + server_join { + retry_join = [ "1.1.1.1", "2.2.2.2" ] + retry_max = 3 + retry_interval = "15s" + } } ``` @@ -349,4 +353,4 @@ client { } } ``` -[server_join]: /docs/agent/configuration/server_join.html.md "Server Join" +[server-join]: /docs/agent/configuration/server_join.html "Server Join" diff --git a/website/source/docs/agent/configuration/cloud-auto-join.html.md b/website/source/docs/agent/configuration/cloud-auto-join.html.md index d912317e49b..71c9bc847d4 100644 --- a/website/source/docs/agent/configuration/cloud-auto-join.html.md +++ b/website/source/docs/agent/configuration/cloud-auto-join.html.md @@ -3,7 +3,7 @@ layout: "docs" page_title: "Cloud Auto-join" sidebar_current: "docs-agent-cloud-auto-join" description: |- - Nomad supports automatic cluster joining using cloud metadata on various providers. + Nomad supports automatic cluster joining using cloud metadata from various cloud providers --- # Cloud Auto-joining @@ -32,8 +32,9 @@ In order to use discovery behind a proxy, you will need to set `HTTP_PROXY`, `HTTPS_PROXY` and `NO_PROXY` environment variables per [Golang `net/http` library](https://golang.org/pkg/net/http/#ProxyFromEnvironment). -The following sections give the options specific to each supported cloud -provider. +The following sections give the options specific to a subset of supported cloud +provider. For information on all providers, see further documentation in +[go-discover](https://github.com/hashicorp/go-discover). ### Amazon EC2 diff --git a/website/source/docs/agent/configuration/server.html.md b/website/source/docs/agent/configuration/server.html.md index 248a9e6d1b4..ede0a1677f2 100644 --- a/website/source/docs/agent/configuration/server.html.md +++ b/website/source/docs/agent/configuration/server.html.md @@ -28,7 +28,11 @@ join failures, and more. server { enabled = true bootstrap_expect = 3 - retry_join = ["1.2.3.4", "5.6.7.8"] + server_join { + retry_join = [ "1.1.1.1", "2.2.2.2" ] + retry_max = 3 + retry_interval = "15s" + } } ``` @@ -131,8 +135,8 @@ server { cluster again when starting. This flag allows the previous state to be used to rejoin the cluster. -- `server_join` - Specifies the [Server Join][server_join] stanza for - configuration which is specific to retry joining Nomad servers. +- `server_join` ([server_join](#server-join): nil) - Specifies + configuration for retry joining Nomad servers if the first attempt fails. - `upgrade_version` `(string: "")` - A custom version of the format X.Y.Z to use in place of the Nomad version when custom upgrades are enabled in Autopilot. @@ -147,25 +151,25 @@ server { succeeds. After one succeeds, no further addresses will be contacted. This is useful for cases where we know the address will become available eventually. Use `retry_join` with an array as a replacement for `start_join`, **do not use - both options**. See the [server_join](#server_join) + both options**. See the [server_join](#server-join) section for more information on the format of the string. This field is - deprecated in favor of [server_join](#server_join). + deprecated in favor of [server_join](#server-join). - `retry_interval` `(string: "30s")` - Specifies the time to wait between retry join attempts. This field is deprecated in favor of - [server_join](#server_join). + [server_join](#server-join). - `retry_max` `(int: 0)` - Specifies the maximum number of join attempts to be made before exiting with a return code of 1. By default, this is set to 0 which is interpreted as infinite retries. This field is deprecated in favor - of [server_join](#server_join). + of [server_join](#server-join). - `start_join` `(array: [])` - Specifies a list of server addresses to join on startup. If Nomad is unable to join with any of the specified addresses, agent startup will fail. See the [server address format](#server-address-format) section for more information on the format of the string. This field is deprecated in favor of - [server_join](#server_join). + [server_join](#server-join). ## `server` Examples @@ -213,4 +217,4 @@ server { ``` [encryption]: /docs/agent/encryption.html "Nomad Agent Encryption" -[server_join]: /docs/agent/configuration/server_join.html.md "Server Join" +[server-join]: /docs/agent/configuration/server_join.html.md "Server Join" diff --git a/website/source/docs/agent/configuration/server_join.html.md b/website/source/docs/agent/configuration/server_join.html.md index 8b6881685f5..eb65f62fdc2 100644 --- a/website/source/docs/agent/configuration/server_join.html.md +++ b/website/source/docs/agent/configuration/server_join.html.md @@ -13,7 +13,6 @@ The server_join stanza specifies how the Nomad agent will discover and connect t ```hcl server_join { retry_join = [ "1.1.1.1", "2.2.2.2" ] - start_join = [ "1.1.1.1", "2.2.2.2" ] retry_max = 3 retry_interval = "15s" } @@ -21,7 +20,7 @@ server_join { ## `server_join` Parameters -- `retry_join` `(array: [])` - Specifies a list of server +- `retry_join` `(array: [])` - Specifies a list of server addresses to retry joining if the first attempt fails. This is similar to [`start_join`](#start_join), but only invokes if the initial join attempt fails, and is available to both Nomad servers and clients, while @@ -32,33 +31,35 @@ server_join { Use `retry_join` with an array as a replacement for `start_join`, **do not use both options**. -Address format includes both using IP addresses as well as an interface to the -[go-discover](https://github.com/hashicorp/go-discover) library for doing -automated cluster joining using cloud metadata. -See Cloud Auto Join`([CloudAutoJoin][cloud_auto_join]: nil). -``` -server_join { - retry_join = [ "1.1.1.1", "2.2.2.2" ] -} -``` -Using the `go-discover` interface, this can be defined both in a client or -server configuration as well as provided as a command-line argument. -``` -server_join { - retry_join = [ "provider=aws tag_key=..." ] -} -``` -See the [server address format](#server-address-format) for more information -about expected server address formats. - -- `retry_interval` `(string: "30s")` - Specifies the time to wait between retry + Address format includes both using IP addresses as well as an interface to the + [go-discover](https://github.com/hashicorp/go-discover) library for doing + automated cluster joining using cloud metadata. + See Cloud Auto Join`([CloudAutoJoin][cloud_auto_join]: nil). + + ``` + server_join { + retry_join = [ "1.1.1.1", "2.2.2.2" ] + } + ``` + + Using the `go-discover` interface, this can be defined both in a client or + server configuration as well as provided as a command-line argument. + ``` + server_join { + retry_join = [ "provider=aws tag_key=..." ] + } + ``` + See the [server address format](#server-address-format) for more information + about expected server address formats. + +- `retry_interval` `(string: "30s")` - Specifies the time to wait between retry join attempts. -- `retry_max` `(int: 0)` - Specifies the maximum number of join attempts to be +- `retry_max` `(int: 0)` - Specifies the maximum number of join attempts to be made before exiting with a return code of 1. By default, this is set to 0 which is interpreted as infinite retries. -- `start_join` `(array: [])` - Specifies a list of server addresses to +- `start_join` `(array: [])` - Specifies a list of server addresses to join on startup. If Nomad is unable to join with any of the specified addresses, agent startup will fail. See the [server address format](#server-address-format) section for more information diff --git a/website/source/guides/cluster/manual.html.md b/website/source/guides/cluster/manual.html.md index e54504beda1..cddd390dce7 100644 --- a/website/source/guides/cluster/manual.html.md +++ b/website/source/guides/cluster/manual.html.md @@ -31,7 +31,9 @@ server { bootstrap_expect = 3 # This is the IP address of the first server we provisioned - retry_join = [":4648"] + server_join { + retry_join = [":4648"] + } } ``` diff --git a/website/source/layouts/docs.erb b/website/source/layouts/docs.erb index 1d40a927d89..2fa5684181f 100644 --- a/website/source/layouts/docs.erb +++ b/website/source/layouts/docs.erb @@ -420,8 +420,8 @@
  • > server
  • -
  • > - server join +
  • > + server_join
  • > telemetry From fa1d2de5dfd4acfb175a0b1a1318863cbb9af472 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Fri, 25 May 2018 17:12:13 -0400 Subject: [PATCH 12/21] RetryInterval should be a time.Duration --- command/agent/command.go | 10 ++++- command/agent/config.go | 5 +-- command/agent/config_parse_test.go | 4 +- command/agent/config_test.go | 12 +++--- command/agent/retry_join.go | 29 +------------- command/agent/retry_join_test.go | 62 ++++++------------------------ 6 files changed, 32 insertions(+), 90 deletions(-) diff --git a/command/agent/command.go b/command/agent/command.go index 309ab6c69df..b9b85ed667a 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -267,6 +267,14 @@ func (c *Command) readConfig() *Config { } } + // COMPAT: Remove in 0.10. Parse the RetryInterval + dur, err := time.ParseDuration(config.Server.RetryInterval) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error parsing retry interval: %s", err)) + return nil + } + config.Server.retryInterval = dur + // Check that the server is running in at least one mode. if !(config.Server.Enabled || config.Client.Enabled) { c.Ui.Error("Must specify either server, client or dev mode for the agent.") @@ -560,7 +568,7 @@ func (c *Command) Run(args []string) int { RetryJoin: config.Server.RetryJoin, StartJoin: config.Server.StartJoin, RetryMaxAttempts: config.Server.RetryMaxAttempts, - RetryInterval: config.Server.RetryInterval, + RetryInterval: config.Server.retryInterval, } go joiner.RetryJoin(serverJoinInfo) } diff --git a/command/agent/config.go b/command/agent/config.go index 3c7bf3b93f7..0398b5004ec 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -377,8 +377,7 @@ type ServerJoin struct { // RetryInterval specifies the amount of time to wait in between join // attempts on agent start. The minimum allowed value is 1 second and // the default is 30s. - RetryInterval string `mapstructure:"retry_interval"` - retryInterval time.Duration `mapstructure:"-"` + RetryInterval time.Duration `mapstructure:"retry_interval"` } func (s *ServerJoin) Merge(b *ServerJoin) { @@ -391,7 +390,7 @@ func (s *ServerJoin) Merge(b *ServerJoin) { if b.RetryMaxAttempts != 0 { s.RetryMaxAttempts = b.RetryMaxAttempts } - if b.RetryInterval != "" { + if b.RetryInterval != 0 { s.RetryInterval = b.RetryInterval } } diff --git a/command/agent/config_parse_test.go b/command/agent/config_parse_test.go index 95387b6a8c0..f1a2175ee2f 100644 --- a/command/agent/config_parse_test.go +++ b/command/agent/config_parse_test.go @@ -49,7 +49,7 @@ func TestConfig_Parse(t *testing.T) { NodeClass: "linux-medium-64bit", ServerJoin: &ServerJoin{ RetryJoin: []string{"1.1.1.1", "2.2.2.2"}, - RetryInterval: "15s", + RetryInterval: time.Duration(15) * time.Second, RetryMaxAttempts: 3, }, Meta: map[string]string{ @@ -114,7 +114,7 @@ func TestConfig_Parse(t *testing.T) { ServerJoin: &ServerJoin{ RetryJoin: []string{"1.1.1.1", "2.2.2.2"}, StartJoin: []string{"1.1.1.1", "2.2.2.2"}, - RetryInterval: "15s", + RetryInterval: time.Duration(15) * time.Second, RetryMaxAttempts: 3, }, }, diff --git a/command/agent/config_test.go b/command/agent/config_test.go index 33ccd8d26a4..6c3337d44ba 100644 --- a/command/agent/config_test.go +++ b/command/agent/config_test.go @@ -916,13 +916,13 @@ func TestMergeServerJoin(t *testing.T) { retryJoin := []string{"127.0.0.1", "127.0.0.2"} startJoin := []string{"127.0.0.1", "127.0.0.2"} retryMaxAttempts := 1 - retryInterval := "1" + retryInterval := time.Duration(0) a := &ServerJoin{ RetryJoin: retryJoin, StartJoin: startJoin, RetryMaxAttempts: retryMaxAttempts, - RetryInterval: retryInterval, + RetryInterval: time.Duration(retryInterval), } b := &ServerJoin{} @@ -936,14 +936,14 @@ func TestMergeServerJoin(t *testing.T) { retryJoin := []string{"127.0.0.1", "127.0.0.2"} startJoin := []string{"127.0.0.1", "127.0.0.2"} retryMaxAttempts := 1 - retryInterval := "1" + retryInterval := time.Duration(0) a := &ServerJoin{} b := &ServerJoin{ RetryJoin: retryJoin, StartJoin: startJoin, RetryMaxAttempts: retryMaxAttempts, - RetryInterval: retryInterval, + RetryInterval: time.Duration(retryInterval), } a.Merge(b) @@ -956,7 +956,7 @@ func TestMergeServerJoin(t *testing.T) { retryJoin := []string{"127.0.0.1", "127.0.0.2"} startJoin := []string{"127.0.0.1", "127.0.0.2"} retryMaxAttempts := 1 - retryInterval := "1" + retryInterval := time.Duration(0) a := &ServerJoin{ RetryJoin: retryJoin, @@ -964,7 +964,7 @@ func TestMergeServerJoin(t *testing.T) { } b := &ServerJoin{ RetryMaxAttempts: retryMaxAttempts, - RetryInterval: retryInterval, + RetryInterval: time.Duration(retryInterval), } a.Merge(b) diff --git a/command/agent/retry_join.go b/command/agent/retry_join.go index 315fe52650a..3600ba7b488 100644 --- a/command/agent/retry_join.go +++ b/command/agent/retry_join.go @@ -82,33 +82,6 @@ func (r *retryJoiner) Validate(config *Config) error { } } - if config.Server != nil { - dur, err := time.ParseDuration(config.Server.RetryInterval) - if err != nil { - return fmt.Errorf("Error parsing server retry interval: %s", err) - } else { - config.Server.retryInterval = dur - } - - if config.Server.ServerJoin != nil { - dur, err := time.ParseDuration(config.Server.RetryInterval) - if err != nil { - return fmt.Errorf("Error parsing server retry interval: %s", err) - } else { - config.Server.ServerJoin.retryInterval = dur - } - } - } - - if config.Client != nil && config.Client.ServerJoin != nil { - dur, err := time.ParseDuration(config.Client.ServerJoin.RetryInterval) - if err != nil { - return fmt.Errorf("Error parsing retry interval: %s", err) - } else { - config.Client.ServerJoin.retryInterval = dur - } - } - return nil } @@ -170,6 +143,6 @@ func (r *retryJoiner) RetryJoin(serverJoin *ServerJoin) { r.logger.Printf("[WARN] agent: Join failed: %v, retrying in %v", err, serverJoin.RetryInterval) } - time.Sleep(serverJoin.retryInterval) + time.Sleep(serverJoin.RetryInterval) } } diff --git a/command/agent/retry_join_test.go b/command/agent/retry_join_test.go index a7e129aca1f..4f875772f31 100644 --- a/command/agent/retry_join_test.go +++ b/command/agent/retry_join_test.go @@ -6,6 +6,7 @@ import ( "log" "os" "testing" + "time" "github.com/hashicorp/nomad/testutil" "github.com/hashicorp/nomad/version" @@ -219,7 +220,7 @@ func TestRetryJoin_Validate(t *testing.T) { ServerJoin: &ServerJoin{ RetryJoin: []string{"127.0.0.1"}, RetryMaxAttempts: 0, - RetryInterval: "0", + RetryInterval: 0, StartJoin: []string{}, }, RetryJoin: []string{"127.0.0.1"}, @@ -237,7 +238,7 @@ func TestRetryJoin_Validate(t *testing.T) { ServerJoin: &ServerJoin{ RetryJoin: []string{"127.0.0.1"}, RetryMaxAttempts: 0, - RetryInterval: "0", + RetryInterval: 0, StartJoin: []string{}, }, StartJoin: []string{"127.0.0.1"}, @@ -255,7 +256,7 @@ func TestRetryJoin_Validate(t *testing.T) { ServerJoin: &ServerJoin{ RetryJoin: []string{"127.0.0.1"}, RetryMaxAttempts: 0, - RetryInterval: "0", + RetryInterval: 0, StartJoin: []string{}, }, StartJoin: []string{}, @@ -273,12 +274,13 @@ func TestRetryJoin_Validate(t *testing.T) { ServerJoin: &ServerJoin{ RetryJoin: []string{"127.0.0.1"}, RetryMaxAttempts: 0, - RetryInterval: "0", + RetryInterval: time.Duration(1), StartJoin: []string{}, }, StartJoin: []string{}, RetryMaxAttempts: 0, - RetryInterval: "1", + RetryInterval: "3s", + retryInterval: time.Duration(3), RetryJoin: []string{}, }, }, @@ -291,7 +293,7 @@ func TestRetryJoin_Validate(t *testing.T) { ServerJoin: &ServerJoin{ RetryJoin: []string{"127.0.0.1"}, RetryMaxAttempts: 0, - RetryInterval: "0", + RetryInterval: 0, StartJoin: []string{"127.0.0.1"}, }, }, @@ -305,7 +307,7 @@ func TestRetryJoin_Validate(t *testing.T) { ServerJoin: &ServerJoin{ RetryJoin: []string{}, RetryMaxAttempts: 0, - RetryInterval: "0", + RetryInterval: 0, StartJoin: []string{"127.0.0.1"}, }, }, @@ -319,7 +321,7 @@ func TestRetryJoin_Validate(t *testing.T) { ServerJoin: &ServerJoin{ RetryJoin: []string{"127.0.0.1"}, RetryMaxAttempts: 0, - RetryInterval: "0", + RetryInterval: 0, }, }, }, @@ -332,7 +334,7 @@ func TestRetryJoin_Validate(t *testing.T) { ServerJoin: &ServerJoin{ RetryJoin: []string{"127.0.0.1"}, RetryMaxAttempts: 0, - RetryInterval: "0", + RetryInterval: 0, StartJoin: []string{}, }, StartJoin: []string{}, @@ -349,53 +351,13 @@ func TestRetryJoin_Validate(t *testing.T) { Server: &ServerConfig{ StartJoin: []string{"127.0.0.1"}, RetryMaxAttempts: 1, - RetryInterval: "3s", + RetryInterval: "0", RetryJoin: []string{}, }, }, isValid: true, reason: "server deprecated retry_join configuration should be valid", }, - { - config: &Config{ - Server: &ServerConfig{ - StartJoin: []string{"127.0.0.1"}, - RetryMaxAttempts: 1, - RetryInterval: "invalid!TimeInterval", - RetryJoin: []string{}, - }, - }, - isValid: false, - reason: "invalid time interval", - }, - { - config: &Config{ - Server: &ServerConfig{ - ServerJoin: &ServerJoin{ - StartJoin: []string{"127.0.0.1"}, - RetryMaxAttempts: 1, - RetryInterval: "invalid!TimeInterval", - RetryJoin: []string{}, - }, - }, - }, - isValid: false, - reason: "invalid time interval", - }, - { - config: &Config{ - Client: &ClientConfig{ - ServerJoin: &ServerJoin{ - StartJoin: []string{"127.0.0.1"}, - RetryMaxAttempts: 1, - RetryInterval: "invalid!TimeInterval", - RetryJoin: []string{}, - }, - }, - }, - isValid: false, - reason: "invalid time interval", - }, } joiner := retryJoiner{} From 94cf42ec812ae15947c5462c90c93d77d0b7b051 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Fri, 25 May 2018 18:43:33 -0400 Subject: [PATCH 13/21] update config parse test documentation fixes --- command/agent/config-test-fixtures/basic.hcl | 9 ++++----- command/agent/config.go | 3 ++- command/agent/config_parse_test.go | 1 - command/agent/retry_join.go | 2 +- website/source/docs/agent/configuration/client.html.md | 5 ++++- website/source/docs/agent/configuration/server.html.md | 4 +++- .../source/docs/agent/configuration/server_join.html.md | 2 +- 7 files changed, 15 insertions(+), 11 deletions(-) diff --git a/command/agent/config-test-fixtures/basic.hcl b/command/agent/config-test-fixtures/basic.hcl index 264cf819696..c4eedbf69c8 100644 --- a/command/agent/config-test-fixtures/basic.hcl +++ b/command/agent/config-test-fixtures/basic.hcl @@ -32,8 +32,8 @@ client { } server_join { retry_join = [ "1.1.1.1", "2.2.2.2" ] - retry_max = 3 - retry_interval = "15s" + retry_max = 3 + retry_interval = "15s" } options { @@ -95,9 +95,8 @@ server { encrypt = "abc" server_join { retry_join = [ "1.1.1.1", "2.2.2.2" ] - start_join = [ "1.1.1.1", "2.2.2.2" ] - retry_max = 3 - retry_interval = "15s" + retry_max = 3 + retry_interval = "15s" } } acl { diff --git a/command/agent/config.go b/command/agent/config.go index 0398b5004ec..376ce7b8df6 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -366,7 +366,8 @@ type ServerJoin struct { // addresses, then the agent will error and exit. StartJoin []string `mapstructure:"start_join"` - // RetryJoin is a list of addresses to join with retry enabled. + // RetryJoin is a list of addresses to join with retry enabled, or a single + // value to find multiple servers using go-discover syntax. RetryJoin []string `mapstructure:"retry_join"` // RetryMaxAttempts specifies the maximum number of times to retry joining a diff --git a/command/agent/config_parse_test.go b/command/agent/config_parse_test.go index f1a2175ee2f..866ed6cc590 100644 --- a/command/agent/config_parse_test.go +++ b/command/agent/config_parse_test.go @@ -113,7 +113,6 @@ func TestConfig_Parse(t *testing.T) { EncryptKey: "abc", ServerJoin: &ServerJoin{ RetryJoin: []string{"1.1.1.1", "2.2.2.2"}, - StartJoin: []string{"1.1.1.1", "2.2.2.2"}, RetryInterval: time.Duration(15) * time.Second, RetryMaxAttempts: 3, }, diff --git a/command/agent/retry_join.go b/command/agent/retry_join.go index 3600ba7b488..66b5851df85 100644 --- a/command/agent/retry_join.go +++ b/command/agent/retry_join.go @@ -70,7 +70,7 @@ func (r *retryJoiner) Validate(config *Config) error { return fmt.Errorf("server_join and retry_max cannot both be defined; try defining only server_join") } if config.Server.RetryInterval != "0" { - return fmt.Errorf("server_join and retry_interval cannot both be defined; try defining only server_join") + return fmt.Errorf("server_join and retry_interval cannot both be defined; prefer setting the server_join parameter") } } diff --git a/website/source/docs/agent/configuration/client.html.md b/website/source/docs/agent/configuration/client.html.md index 4cb00e6c0b1..7cee5601a59 100644 --- a/website/source/docs/agent/configuration/client.html.md +++ b/website/source/docs/agent/configuration/client.html.md @@ -91,7 +91,10 @@ client { the port. If the port is omitted, the default port of `4647` is used. - `server_join` ([server_join](#server-join): nil) - Specifies - configuration which is specific to retry joining Nomad servers. + how the Nomad client will connect to Nomad servers. The `start_join` field + is not supported on the client. The retry_join fields may directly specify + the server address or use go-discover syntax for auto-discovery. See the + documentation for more detail. - `state_dir` `(string: "[data_dir]/client")` - Specifies the directory to use to store client state. By default, this is - the top-level diff --git a/website/source/docs/agent/configuration/server.html.md b/website/source/docs/agent/configuration/server.html.md index ede0a1677f2..d1919872d11 100644 --- a/website/source/docs/agent/configuration/server.html.md +++ b/website/source/docs/agent/configuration/server.html.md @@ -136,7 +136,9 @@ server { rejoin the cluster. - `server_join` ([server_join](#server-join): nil) - Specifies - configuration for retry joining Nomad servers if the first attempt fails. + how the Nomad client will connect to Nomad servers. The retry_join fields may + directly specify the server address or use go-discover syntax for + auto-discovery. See the documentation for more detail. - `upgrade_version` `(string: "")` - A custom version of the format X.Y.Z to use in place of the Nomad version when custom upgrades are enabled in Autopilot. diff --git a/website/source/docs/agent/configuration/server_join.html.md b/website/source/docs/agent/configuration/server_join.html.md index eb65f62fdc2..4fa823e2356 100644 --- a/website/source/docs/agent/configuration/server_join.html.md +++ b/website/source/docs/agent/configuration/server_join.html.md @@ -107,7 +107,7 @@ nomad-01.company.local => nomad-01.company.local:4648 #### Via the go-discover interface -As of Nomad 0.8.4, `retry-join` accepts a unified interface using the +As of Nomad 0.8.4, `retry_join` accepts a unified interface using the [go-discover](https://github.com/hashicorp/go-discover) library for doing automated cluster joining using cloud metadata. From 236ac6592e16a38273f89fcdeb402525f9287e41 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Tue, 29 May 2018 11:25:09 -0400 Subject: [PATCH 14/21] add stronger protections for nil pointers in server join merge --- command/agent/config.go | 24 +++++++++---- command/agent/config_test.go | 70 ++++++++++++++++++++++++++++-------- 2 files changed, 73 insertions(+), 21 deletions(-) diff --git a/command/agent/config.go b/command/agent/config.go index 376ce7b8df6..1bc21bb2969 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -381,19 +381,31 @@ type ServerJoin struct { RetryInterval time.Duration `mapstructure:"retry_interval"` } -func (s *ServerJoin) Merge(b *ServerJoin) { +func (s *ServerJoin) Merge(b *ServerJoin) *ServerJoin { + if s == nil { + return b + } + + result := *s + + if b == nil { + return &result + } + if len(b.StartJoin) != 0 { - s.StartJoin = b.StartJoin + result.StartJoin = b.StartJoin } if len(b.RetryJoin) != 0 { - s.RetryJoin = b.RetryJoin + result.RetryJoin = b.RetryJoin } if b.RetryMaxAttempts != 0 { - s.RetryMaxAttempts = b.RetryMaxAttempts + result.RetryMaxAttempts = b.RetryMaxAttempts } if b.RetryInterval != 0 { - s.RetryInterval = b.RetryInterval + result.RetryInterval = b.RetryInterval } + + return &result } // EncryptBytes returns the encryption key configured. @@ -1104,7 +1116,7 @@ func (a *ServerConfig) Merge(b *ServerConfig) *ServerConfig { result.EncryptKey = b.EncryptKey } if b.ServerJoin != nil { - result.ServerJoin.Merge(b.ServerJoin) + result.ServerJoin = result.ServerJoin.Merge(b.ServerJoin) } // Add the schedulers diff --git a/command/agent/config_test.go b/command/agent/config_test.go index 6c3337d44ba..c85ad2c0384 100644 --- a/command/agent/config_test.go +++ b/command/agent/config_test.go @@ -926,11 +926,11 @@ func TestMergeServerJoin(t *testing.T) { } b := &ServerJoin{} - a.Merge(b) - require.Equal(a.RetryJoin, retryJoin) - require.Equal(a.StartJoin, startJoin) - require.Equal(a.RetryMaxAttempts, retryMaxAttempts) - require.Equal(a.RetryInterval, retryInterval) + result := a.Merge(b) + require.Equal(result.RetryJoin, retryJoin) + require.Equal(result.StartJoin, startJoin) + require.Equal(result.RetryMaxAttempts, retryMaxAttempts) + require.Equal(result.RetryInterval, retryInterval) } { retryJoin := []string{"127.0.0.1", "127.0.0.2"} @@ -946,11 +946,51 @@ func TestMergeServerJoin(t *testing.T) { RetryInterval: time.Duration(retryInterval), } - a.Merge(b) - require.Equal(a.RetryJoin, retryJoin) - require.Equal(a.StartJoin, startJoin) - require.Equal(a.RetryMaxAttempts, retryMaxAttempts) - require.Equal(a.RetryInterval, retryInterval) + result := a.Merge(b) + require.Equal(result.RetryJoin, retryJoin) + require.Equal(result.StartJoin, startJoin) + require.Equal(result.RetryMaxAttempts, retryMaxAttempts) + require.Equal(result.RetryInterval, retryInterval) + } + { + retryJoin := []string{"127.0.0.1", "127.0.0.2"} + startJoin := []string{"127.0.0.1", "127.0.0.2"} + retryMaxAttempts := 1 + retryInterval := time.Duration(0) + + var a *ServerJoin + b := &ServerJoin{ + RetryJoin: retryJoin, + StartJoin: startJoin, + RetryMaxAttempts: retryMaxAttempts, + RetryInterval: time.Duration(retryInterval), + } + + result := a.Merge(b) + require.Equal(result.RetryJoin, retryJoin) + require.Equal(result.StartJoin, startJoin) + require.Equal(result.RetryMaxAttempts, retryMaxAttempts) + require.Equal(result.RetryInterval, retryInterval) + } + { + retryJoin := []string{"127.0.0.1", "127.0.0.2"} + startJoin := []string{"127.0.0.1", "127.0.0.2"} + retryMaxAttempts := 1 + retryInterval := time.Duration(0) + + a := &ServerJoin{ + RetryJoin: retryJoin, + StartJoin: startJoin, + RetryMaxAttempts: retryMaxAttempts, + RetryInterval: time.Duration(retryInterval), + } + var b *ServerJoin + + result := a.Merge(b) + require.Equal(result.RetryJoin, retryJoin) + require.Equal(result.StartJoin, startJoin) + require.Equal(result.RetryMaxAttempts, retryMaxAttempts) + require.Equal(result.RetryInterval, retryInterval) } { retryJoin := []string{"127.0.0.1", "127.0.0.2"} @@ -967,10 +1007,10 @@ func TestMergeServerJoin(t *testing.T) { RetryInterval: time.Duration(retryInterval), } - a.Merge(b) - require.Equal(a.RetryJoin, retryJoin) - require.Equal(a.StartJoin, startJoin) - require.Equal(a.RetryMaxAttempts, retryMaxAttempts) - require.Equal(a.RetryInterval, retryInterval) + result := a.Merge(b) + require.Equal(result.RetryJoin, retryJoin) + require.Equal(result.StartJoin, startJoin) + require.Equal(result.RetryMaxAttempts, retryMaxAttempts) + require.Equal(result.RetryInterval, retryInterval) } } From 3d12b3fdf27da32f06fb6bc5e3d5a63b704d968d Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Tue, 29 May 2018 11:33:53 -0400 Subject: [PATCH 15/21] ignore default values for retry interval add additional validation case --- command/agent/retry_join.go | 11 +++++++++-- command/agent/retry_join_test.go | 15 +++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/command/agent/retry_join.go b/command/agent/retry_join.go index 66b5851df85..c3e9bf4754a 100644 --- a/command/agent/retry_join.go +++ b/command/agent/retry_join.go @@ -69,8 +69,15 @@ func (r *retryJoiner) Validate(config *Config) error { if config.Server.RetryMaxAttempts != 0 { return fmt.Errorf("server_join and retry_max cannot both be defined; try defining only server_join") } - if config.Server.RetryInterval != "0" { - return fmt.Errorf("server_join and retry_interval cannot both be defined; prefer setting the server_join parameter") + if config.Server.RetryInterval != "0" && config.Server.RetryInterval != "" { + // 30s is the default value that is set, ignore if this is the case + if config.Server.RetryInterval != "30s" { + return fmt.Errorf("server_join and retry_interval cannot both be defined; prefer setting the server_join parameter") + } + } + + if len(config.Server.ServerJoin.RetryJoin) != 0 && len(config.Server.ServerJoin.StartJoin) != 0 { + return fmt.Errorf("server_join and start_join cannot both be defined in the same stanza") } } diff --git a/command/agent/retry_join_test.go b/command/agent/retry_join_test.go index 4f875772f31..d5479cfb06a 100644 --- a/command/agent/retry_join_test.go +++ b/command/agent/retry_join_test.go @@ -358,6 +358,21 @@ func TestRetryJoin_Validate(t *testing.T) { isValid: true, reason: "server deprecated retry_join configuration should be valid", }, + { + config: &Config{ + Server: &ServerConfig{ + RetryInterval: "30s", + ServerJoin: &ServerJoin{ + RetryJoin: []string{"127.0.0.1"}, + RetryMaxAttempts: 0, + RetryInterval: time.Duration(20) * time.Second, + StartJoin: []string{}, + }, + }, + }, + isValid: true, + reason: "ignore default value for retry interval", + }, } joiner := retryJoiner{} From 4b20a698ac703926751ebd4d63ec35ddea58ada0 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Tue, 29 May 2018 15:12:08 -0400 Subject: [PATCH 16/21] ensure default value of 30s is set for server_join stanza --- command/agent/command.go | 4 ++++ command/agent/config.go | 14 +++++++++++++- command/agent/retry_join.go | 7 ++----- command/agent/retry_join_test.go | 2 +- 4 files changed, 20 insertions(+), 7 deletions(-) diff --git a/command/agent/command.go b/command/agent/command.go index b9b85ed667a..3acd34ea858 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -591,6 +591,10 @@ func (c *Command) Run(args []string) int { } if config.Client.Enabled && config.Client.ServerJoin != nil { + // COMPAT: Remove in 0.10 set the default RetryInterval value, as the + // ServerJoin stanza is not part of a default config for an agent. + config.Client.ServerJoin.RetryInterval = time.Duration(30) * time.Second + joiner := retryJoiner{ discover: &discover.Discover{}, errCh: c.retryJoinErrCh, diff --git a/command/agent/config.go b/command/agent/config.go index 1bc21bb2969..07e67b52626 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -1116,6 +1116,9 @@ func (a *ServerConfig) Merge(b *ServerConfig) *ServerConfig { result.EncryptKey = b.EncryptKey } if b.ServerJoin != nil { + // // COMPAT: Remove in 0.10 - ServerJoin is not defined by default on an + // agent config, this should be eventually moved to DefaultConfig + result.ServerJoin = getDefaultServerJoin() result.ServerJoin = result.ServerJoin.Merge(b.ServerJoin) } @@ -1135,6 +1138,12 @@ func (a *ServerConfig) Merge(b *ServerConfig) *ServerConfig { return &result } +func getDefaultServerJoin() *ServerJoin { + return &ServerJoin{ + RetryInterval: time.Duration(30) * time.Second, + } +} + // Merge is used to merge two client configs together func (a *ClientConfig) Merge(b *ClientConfig) *ClientConfig { result := *a @@ -1226,7 +1235,10 @@ func (a *ClientConfig) Merge(b *ClientConfig) *ClientConfig { } if b.ServerJoin != nil { - result.ServerJoin = b.ServerJoin + // // COMPAT: Remove in 0.10 - ServerJoin is not defined by default on an + // agent config, this should be eventually moved to DefaultConfig + result.ServerJoin = getDefaultServerJoin() + result.ServerJoin = result.ServerJoin.Merge(b.ServerJoin) } return &result diff --git a/command/agent/retry_join.go b/command/agent/retry_join.go index c3e9bf4754a..e49412d8c80 100644 --- a/command/agent/retry_join.go +++ b/command/agent/retry_join.go @@ -69,11 +69,8 @@ func (r *retryJoiner) Validate(config *Config) error { if config.Server.RetryMaxAttempts != 0 { return fmt.Errorf("server_join and retry_max cannot both be defined; try defining only server_join") } - if config.Server.RetryInterval != "0" && config.Server.RetryInterval != "" { - // 30s is the default value that is set, ignore if this is the case - if config.Server.RetryInterval != "30s" { - return fmt.Errorf("server_join and retry_interval cannot both be defined; prefer setting the server_join parameter") - } + if config.Server.RetryInterval != "30s" { + return fmt.Errorf("server_join and retry_interval cannot both be defined; prefer setting the server_join parameter") } if len(config.Server.ServerJoin.RetryJoin) != 0 && len(config.Server.ServerJoin.StartJoin) != 0 { diff --git a/command/agent/retry_join_test.go b/command/agent/retry_join_test.go index d5479cfb06a..f1eb7fd3541 100644 --- a/command/agent/retry_join_test.go +++ b/command/agent/retry_join_test.go @@ -339,7 +339,7 @@ func TestRetryJoin_Validate(t *testing.T) { }, StartJoin: []string{}, RetryMaxAttempts: 0, - RetryInterval: "0", + RetryInterval: "30s", RetryJoin: []string{}, }, }, From 907def09520ebdecee170364b8afe39c78830949 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 30 May 2018 13:42:56 -0700 Subject: [PATCH 17/21] indentation --- command/agent/config-test-fixtures/basic.hcl | 173 +++++++++---------- 1 file changed, 86 insertions(+), 87 deletions(-) diff --git a/command/agent/config-test-fixtures/basic.hcl b/command/agent/config-test-fixtures/basic.hcl index c4eedbf69c8..b5a3a77cd50 100644 --- a/command/agent/config-test-fixtures/basic.hcl +++ b/command/agent/config-test-fixtures/basic.hcl @@ -30,12 +30,11 @@ client { foo = "bar" baz = "zip" } - server_join { - retry_join = [ "1.1.1.1", "2.2.2.2" ] - retry_max = 3 - retry_interval = "15s" - } - + server_join { + retry_join = [ "1.1.1.1", "2.2.2.2" ] + retry_max = 3 + retry_interval = "15s" + } options { foo = "bar" baz = "zip" @@ -56,17 +55,17 @@ client { } client_min_port = 1000 client_max_port = 2000 - max_kill_timeout = "10s" - stats { - data_points = 35 - collection_interval = "5s" - } - gc_interval = "6s" - gc_parallel_destroys = 6 - gc_disk_usage_threshold = 82 - gc_inode_usage_threshold = 91 - gc_max_allocs = 50 - no_host_uuid = false + max_kill_timeout = "10s" + stats { + data_points = 35 + collection_interval = "5s" + } + gc_interval = "6s" + gc_parallel_destroys = 6 + gc_disk_usage_threshold = 82 + gc_inode_usage_threshold = 91 + gc_max_allocs = 50 + no_host_uuid = false } server { enabled = true @@ -93,28 +92,28 @@ server { redundancy_zone = "foo" upgrade_version = "0.8.0" encrypt = "abc" - server_join { - retry_join = [ "1.1.1.1", "2.2.2.2" ] - retry_max = 3 - retry_interval = "15s" - } + server_join { + retry_join = [ "1.1.1.1", "2.2.2.2" ] + retry_max = 3 + retry_interval = "15s" + } } acl { - enabled = true - token_ttl = "60s" - policy_ttl = "60s" - replication_token = "foobar" + enabled = true + token_ttl = "60s" + policy_ttl = "60s" + replication_token = "foobar" } telemetry { statsite_address = "127.0.0.1:1234" statsd_address = "127.0.0.1:2345" prometheus_metrics = true disable_hostname = true - collection_interval = "3s" - publish_allocation_metrics = true - publish_node_metrics = true - disable_tagged_metrics = true - backwards_compatible_metrics = true + collection_interval = "3s" + publish_allocation_metrics = true + publish_node_metrics = true + disable_tagged_metrics = true + backwards_compatible_metrics = true } leave_on_interrupt = true leave_on_terminate = true @@ -126,68 +125,68 @@ http_api_response_headers { Access-Control-Allow-Origin = "*" } consul { - server_service_name = "nomad" - server_http_check_name = "nomad-server-http-health-check" - server_serf_check_name = "nomad-server-serf-health-check" - server_rpc_check_name = "nomad-server-rpc-health-check" - client_service_name = "nomad-client" - client_http_check_name = "nomad-client-http-health-check" - address = "127.0.0.1:9500" - token = "token1" - auth = "username:pass" - ssl = true - verify_ssl = true - ca_file = "/path/to/ca/file" - cert_file = "/path/to/cert/file" - key_file = "/path/to/key/file" - server_auto_join = true - client_auto_join = true - auto_advertise = true - checks_use_advertise = true + server_service_name = "nomad" + server_http_check_name = "nomad-server-http-health-check" + server_serf_check_name = "nomad-server-serf-health-check" + server_rpc_check_name = "nomad-server-rpc-health-check" + client_service_name = "nomad-client" + client_http_check_name = "nomad-client-http-health-check" + address = "127.0.0.1:9500" + token = "token1" + auth = "username:pass" + ssl = true + verify_ssl = true + ca_file = "/path/to/ca/file" + cert_file = "/path/to/cert/file" + key_file = "/path/to/key/file" + server_auto_join = true + client_auto_join = true + auto_advertise = true + checks_use_advertise = true } vault { - address = "127.0.0.1:9500" - allow_unauthenticated = true - task_token_ttl = "1s" - enabled = false - token = "12345" - ca_file = "/path/to/ca/file" - ca_path = "/path/to/ca" - cert_file = "/path/to/cert/file" - key_file = "/path/to/key/file" - tls_server_name = "foobar" - tls_skip_verify = true - create_from_role = "test_role" + address = "127.0.0.1:9500" + allow_unauthenticated = true + task_token_ttl = "1s" + enabled = false + token = "12345" + ca_file = "/path/to/ca/file" + ca_path = "/path/to/ca" + cert_file = "/path/to/cert/file" + key_file = "/path/to/key/file" + tls_server_name = "foobar" + tls_skip_verify = true + create_from_role = "test_role" } tls { - http = true - rpc = true - verify_server_hostname = true - ca_file = "foo" - cert_file = "bar" - key_file = "pipe" - rpc_upgrade_mode = true - verify_https_client = true - tls_prefer_server_cipher_suites = true - tls_cipher_suites = "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256" - tls_min_version = "tls12" + http = true + rpc = true + verify_server_hostname = true + ca_file = "foo" + cert_file = "bar" + key_file = "pipe" + rpc_upgrade_mode = true + verify_https_client = true + tls_prefer_server_cipher_suites = true + tls_cipher_suites = "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256" + tls_min_version = "tls12" } sentinel { - import "foo" { - path = "foo" - args = ["a", "b", "c"] - } - import "bar" { - path = "bar" - args = ["x", "y", "z"] - } + import "foo" { + path = "foo" + args = ["a", "b", "c"] + } + import "bar" { + path = "bar" + args = ["x", "y", "z"] + } } autopilot { - cleanup_dead_servers = true - disable_upgrade_migration = true - last_contact_threshold = "12705s" - max_trailing_logs = 17849 - enable_redundancy_zones = true - server_stabilization_time = "23057s" - enable_custom_upgrades = true + cleanup_dead_servers = true + disable_upgrade_migration = true + last_contact_threshold = "12705s" + max_trailing_logs = 17849 + enable_redundancy_zones = true + server_stabilization_time = "23057s" + enable_custom_upgrades = true } From 13f8e91e35258d3bc4d881ff50fab3fbda3e9088 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 30 May 2018 13:49:36 -0700 Subject: [PATCH 18/21] validation errors --- command/agent/retry_join.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/command/agent/retry_join.go b/command/agent/retry_join.go index e49412d8c80..d2deee51457 100644 --- a/command/agent/retry_join.go +++ b/command/agent/retry_join.go @@ -61,20 +61,20 @@ func (r *retryJoiner) Validate(config *Config) error { // fields and the server_join stanza are not both set if config.Server != nil && config.Server.ServerJoin != nil { if len(config.Server.RetryJoin) != 0 { - return fmt.Errorf("server_join and retry_join cannot both be defined; try defining only server_join") + return fmt.Errorf("server_join and retry_join cannot both be defined; prefer setting the server_join stanza") } if len(config.Server.StartJoin) != 0 { - return fmt.Errorf("server_join and start_join cannot both be defined; try defining only server_join") + return fmt.Errorf("server_join and start_join cannot both be defined; prefer setting the server_join stanza") } if config.Server.RetryMaxAttempts != 0 { - return fmt.Errorf("server_join and retry_max cannot both be defined; try defining only server_join") + return fmt.Errorf("server_join and retry_max cannot both be defined; prefer setting the server_join stanza") } if config.Server.RetryInterval != "30s" { - return fmt.Errorf("server_join and retry_interval cannot both be defined; prefer setting the server_join parameter") + return fmt.Errorf("server_join and retry_interval cannot both be defined; prefer setting the server_join stanza") } if len(config.Server.ServerJoin.RetryJoin) != 0 && len(config.Server.ServerJoin.StartJoin) != 0 { - return fmt.Errorf("server_join and start_join cannot both be defined in the same stanza") + return fmt.Errorf("retry_join and start_join cannot both be defined") } } From 2dde49e2bfee40e23925e569be3fd9d713dde74e Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 30 May 2018 16:51:55 -0700 Subject: [PATCH 19/21] Improve validation/defaulting, handle start-join This commit: * Improves how we combine the old retry-* fields and the new stanza and how it is validated * Handles the new stanza setting start_join * Fixes integration test to not bind to the standard port and instead be randomized. * Simplifies parsing of the old retry_interval * Fixes the errors from retry join being masked * Flags get parsed into new server_join stanza --- command/agent/command.go | 112 +++++++++++++++++++---------- command/agent/config.go | 35 ++++----- command/agent/config_parse_test.go | 2 +- command/agent/config_test.go | 3 +- command/agent/retry_join.go | 14 ++-- command/agent/retry_join_test.go | 97 ++++++++----------------- 6 files changed, 128 insertions(+), 135 deletions(-) diff --git a/command/agent/command.go b/command/agent/command.go index 3acd34ea858..9b9fb9ad436 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -63,9 +63,11 @@ func (c *Command) readConfig() *Config { Client: &ClientConfig{}, Consul: &config.ConsulConfig{}, Ports: &Ports{}, - Server: &ServerConfig{}, - Vault: &config.VaultConfig{}, - ACL: &ACLConfig{}, + Server: &ServerConfig{ + ServerJoin: &ServerJoin{}, + }, + Vault: &config.VaultConfig{}, + ACL: &ACLConfig{}, } flags := flag.NewFlagSet("agent", flag.ContinueOnError) @@ -78,13 +80,16 @@ func (c *Command) readConfig() *Config { // Server-only options flags.IntVar(&cmdConfig.Server.BootstrapExpect, "bootstrap-expect", 0, "") - flags.BoolVar(&cmdConfig.Server.RejoinAfterLeave, "rejoin", false, "") - flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.StartJoin), "join", "") - flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.RetryJoin), "retry-join", "") - flags.IntVar(&cmdConfig.Server.RetryMaxAttempts, "retry-max", 0, "") - flags.StringVar(&cmdConfig.Server.RetryInterval, "retry-interval", "", "") flags.StringVar(&cmdConfig.Server.EncryptKey, "encrypt", "", "gossip encryption key") flags.IntVar(&cmdConfig.Server.RaftProtocol, "raft-protocol", 0, "") + flags.BoolVar(&cmdConfig.Server.RejoinAfterLeave, "rejoin", false, "") + flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.ServerJoin.StartJoin), "join", "") + flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.ServerJoin.RetryJoin), "retry-join", "") + flags.IntVar(&cmdConfig.Server.ServerJoin.RetryMaxAttempts, "retry-max", 0, "") + flags.Var((flaghelper.FuncDurationVar)(func(d time.Duration) error { + cmdConfig.Server.ServerJoin.RetryInterval = d + return nil + }), "retry-interval", "") // Client-only options flags.StringVar(&cmdConfig.Client.StateDir, "state-dir", "", "") @@ -267,14 +272,6 @@ func (c *Command) readConfig() *Config { } } - // COMPAT: Remove in 0.10. Parse the RetryInterval - dur, err := time.ParseDuration(config.Server.RetryInterval) - if err != nil { - c.Ui.Error(fmt.Sprintf("Error parsing retry interval: %s", err)) - return nil - } - config.Server.retryInterval = dur - // Check that the server is running in at least one mode. if !(config.Server.Enabled || config.Client.Enabled) { c.Ui.Error("Must specify either server, client or dev mode for the agent.") @@ -547,6 +544,17 @@ func (c *Command) Run(args []string) int { logGate.Flush() // Start retry join process + if err := c.handleRetryJoin(config); err != nil { + c.Ui.Error(err.Error()) + return 1 + } + + // Wait for exit + return c.handleSignals() +} + +// handleRetryJoin is used to start retry joining if it is configured. +func (c *Command) handleRetryJoin(config *Config) error { c.retryJoinErrCh = make(chan struct{}) if config.Server.Enabled && len(config.Server.RetryJoin) != 0 { @@ -559,21 +567,30 @@ func (c *Command) Run(args []string) int { } if err := joiner.Validate(config); err != nil { - c.Ui.Error(err.Error()) - return 1 + return err } - // COMPAT: Remove in 0.10 and only use ServerJoin - serverJoinInfo := &ServerJoin{ - RetryJoin: config.Server.RetryJoin, - StartJoin: config.Server.StartJoin, - RetryMaxAttempts: config.Server.RetryMaxAttempts, - RetryInterval: config.Server.retryInterval, + // Remove the duplicate fields + if len(config.Server.RetryJoin) != 0 { + config.Server.ServerJoin.RetryJoin = config.Server.RetryJoin + config.Server.RetryJoin = nil + } + if config.Server.RetryMaxAttempts != 0 { + config.Server.ServerJoin.RetryMaxAttempts = config.Server.RetryMaxAttempts + config.Server.RetryMaxAttempts = 0 } - go joiner.RetryJoin(serverJoinInfo) + if config.Server.RetryInterval != 0 { + config.Server.ServerJoin.RetryInterval = config.Server.RetryInterval + config.Server.RetryInterval = 0 + } + + c.agent.logger.Printf("[WARN] agent: Using deprecated retry_join fields. Upgrade configuration to use server_join") } - if config.Server.Enabled && config.Server.ServerJoin != nil { + if config.Server.Enabled && + config.Server.ServerJoin != nil && + len(config.Server.ServerJoin.RetryJoin) != 0 { + joiner := retryJoiner{ discover: &discover.Discover{}, errCh: c.retryJoinErrCh, @@ -583,18 +600,15 @@ func (c *Command) Run(args []string) int { } if err := joiner.Validate(config); err != nil { - c.Ui.Error(err.Error()) - return 1 + return err } go joiner.RetryJoin(config.Server.ServerJoin) } - if config.Client.Enabled && config.Client.ServerJoin != nil { - // COMPAT: Remove in 0.10 set the default RetryInterval value, as the - // ServerJoin stanza is not part of a default config for an agent. - config.Client.ServerJoin.RetryInterval = time.Duration(30) * time.Second - + if config.Client.Enabled && + config.Client.ServerJoin != nil && + len(config.Client.ServerJoin.RetryJoin) != 0 { joiner := retryJoiner{ discover: &discover.Discover{}, errCh: c.retryJoinErrCh, @@ -604,15 +618,13 @@ func (c *Command) Run(args []string) int { } if err := joiner.Validate(config); err != nil { - c.Ui.Error(err.Error()) - return 1 + return err } go joiner.RetryJoin(config.Client.ServerJoin) } - // Wait for exit - return c.handleSignals() + return nil } // handleSignals blocks until we get an exit-causing signal @@ -885,12 +897,34 @@ func (c *Command) setupTelemetry(config *Config) (*metrics.InmemSink, error) { } func (c *Command) startupJoin(config *Config) error { - if len(config.Server.StartJoin) == 0 || !config.Server.Enabled { + // Nothing to do + if !config.Server.Enabled { return nil } + // Validate both old and new aren't being set + old := len(config.Server.StartJoin) + var new int + if config.Server.ServerJoin != nil { + new = len(config.Server.ServerJoin.StartJoin) + } + if old != 0 && new != 0 { + return fmt.Errorf("server_join and start_join cannot both be defined; prefer setting the server_join stanza") + } + + // Nothing to do + if old+new == 0 { + return nil + } + + // Combine the lists and join + joining := config.Server.StartJoin + if new != 0 { + joining = append(joining, config.Server.ServerJoin.StartJoin...) + } + c.Ui.Output("Joining cluster...") - n, err := c.agent.server.Join(config.Server.StartJoin) + n, err := c.agent.server.Join(joining) if err != nil { return err } diff --git a/command/agent/config.go b/command/agent/config.go index 07e67b52626..301d31b3cb8 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -331,8 +331,7 @@ type ServerConfig struct { // attempts on agent start. The minimum allowed value is 1 second and // the default is 30s. // Deprecated in Nomad 0.10 - RetryInterval string `mapstructure:"retry_interval"` - retryInterval time.Duration `mapstructure:"-"` + RetryInterval time.Duration `mapstructure:"retry_interval"` // RejoinAfterLeave controls our interaction with the cluster after leave. // When set to false (default), a leave causes Consul to not rejoin @@ -661,13 +660,20 @@ func DefaultConfig() *Config { GCInodeUsageThreshold: 70, GCMaxAllocs: 50, NoHostUUID: helper.BoolToPtr(true), + ServerJoin: &ServerJoin{ + RetryJoin: []string{}, + RetryInterval: 30 * time.Second, + RetryMaxAttempts: 0, + }, }, Server: &ServerConfig{ - Enabled: false, - StartJoin: []string{}, - RetryJoin: []string{}, - RetryInterval: "30s", - RetryMaxAttempts: 0, + Enabled: false, + StartJoin: []string{}, + ServerJoin: &ServerJoin{ + RetryJoin: []string{}, + RetryInterval: 30 * time.Second, + RetryMaxAttempts: 0, + }, }, ACL: &ACLConfig{ Enabled: false, @@ -1096,9 +1102,8 @@ func (a *ServerConfig) Merge(b *ServerConfig) *ServerConfig { if b.RetryMaxAttempts != 0 { result.RetryMaxAttempts = b.RetryMaxAttempts } - if b.RetryInterval != "" { + if b.RetryInterval != 0 { result.RetryInterval = b.RetryInterval - result.retryInterval = b.retryInterval } if b.RejoinAfterLeave { result.RejoinAfterLeave = true @@ -1116,9 +1121,6 @@ func (a *ServerConfig) Merge(b *ServerConfig) *ServerConfig { result.EncryptKey = b.EncryptKey } if b.ServerJoin != nil { - // // COMPAT: Remove in 0.10 - ServerJoin is not defined by default on an - // agent config, this should be eventually moved to DefaultConfig - result.ServerJoin = getDefaultServerJoin() result.ServerJoin = result.ServerJoin.Merge(b.ServerJoin) } @@ -1138,12 +1140,6 @@ func (a *ServerConfig) Merge(b *ServerConfig) *ServerConfig { return &result } -func getDefaultServerJoin() *ServerJoin { - return &ServerJoin{ - RetryInterval: time.Duration(30) * time.Second, - } -} - // Merge is used to merge two client configs together func (a *ClientConfig) Merge(b *ClientConfig) *ClientConfig { result := *a @@ -1235,9 +1231,6 @@ func (a *ClientConfig) Merge(b *ClientConfig) *ClientConfig { } if b.ServerJoin != nil { - // // COMPAT: Remove in 0.10 - ServerJoin is not defined by default on an - // agent config, this should be eventually moved to DefaultConfig - result.ServerJoin = getDefaultServerJoin() result.ServerJoin = result.ServerJoin.Merge(b.ServerJoin) } diff --git a/command/agent/config_parse_test.go b/command/agent/config_parse_test.go index 866ed6cc590..fe8c5c6855b 100644 --- a/command/agent/config_parse_test.go +++ b/command/agent/config_parse_test.go @@ -104,7 +104,7 @@ func TestConfig_Parse(t *testing.T) { MaxHeartbeatsPerSecond: 11.0, RetryJoin: []string{"1.1.1.1", "2.2.2.2"}, StartJoin: []string{"1.1.1.1", "2.2.2.2"}, - RetryInterval: "15s", + RetryInterval: 15 * time.Second, RejoinAfterLeave: true, RetryMaxAttempts: 3, NonVotingServer: true, diff --git a/command/agent/config_test.go b/command/agent/config_test.go index c85ad2c0384..d756912094f 100644 --- a/command/agent/config_test.go +++ b/command/agent/config_test.go @@ -265,8 +265,7 @@ func TestConfig_Merge(t *testing.T) { RejoinAfterLeave: true, StartJoin: []string{"1.1.1.1"}, RetryJoin: []string{"1.1.1.1"}, - RetryInterval: "10s", - retryInterval: time.Second * 10, + RetryInterval: time.Second * 10, NonVotingServer: true, RedundancyZone: "bar", UpgradeVersion: "bar", diff --git a/command/agent/retry_join.go b/command/agent/retry_join.go index d2deee51457..2e8735be1d0 100644 --- a/command/agent/retry_join.go +++ b/command/agent/retry_join.go @@ -59,7 +59,7 @@ func (r *retryJoiner) Validate(config *Config) error { // If retry_join is defined for the server, ensure that deprecated // fields and the server_join stanza are not both set - if config.Server != nil && config.Server.ServerJoin != nil { + if config.Server != nil && config.Server.ServerJoin != nil && len(config.Server.ServerJoin.RetryJoin) != 0 { if len(config.Server.RetryJoin) != 0 { return fmt.Errorf("server_join and retry_join cannot both be defined; prefer setting the server_join stanza") } @@ -69,11 +69,12 @@ func (r *retryJoiner) Validate(config *Config) error { if config.Server.RetryMaxAttempts != 0 { return fmt.Errorf("server_join and retry_max cannot both be defined; prefer setting the server_join stanza") } - if config.Server.RetryInterval != "30s" { + + if config.Server.RetryInterval != 0 { return fmt.Errorf("server_join and retry_interval cannot both be defined; prefer setting the server_join stanza") } - if len(config.Server.ServerJoin.RetryJoin) != 0 && len(config.Server.ServerJoin.StartJoin) != 0 { + if len(config.Server.ServerJoin.StartJoin) != 0 { return fmt.Errorf("retry_join and start_join cannot both be defined") } } @@ -103,6 +104,7 @@ func (r *retryJoiner) RetryJoin(serverJoin *ServerJoin) { for { var addrs []string + var n int var err error for _, addr := range serverJoin.RetryJoin { @@ -121,14 +123,14 @@ func (r *retryJoiner) RetryJoin(serverJoin *ServerJoin) { if len(addrs) > 0 { if r.serverEnabled && r.serverJoin != nil { - n, err := r.serverJoin(addrs) + n, err = r.serverJoin(addrs) if err == nil { r.logger.Printf("[INFO] agent: Join completed. Server synced with %d initial servers", n) return } } if r.clientEnabled && r.clientJoin != nil { - n, err := r.clientJoin(addrs) + n, err = r.clientJoin(addrs) if err == nil { r.logger.Printf("[INFO] agent: Join completed. Client synced with %d initial servers", n) return @@ -144,7 +146,7 @@ func (r *retryJoiner) RetryJoin(serverJoin *ServerJoin) { } if err != nil { - r.logger.Printf("[WARN] agent: Join failed: %v, retrying in %v", err, + r.logger.Printf("[WARN] agent: Join failed: %q, retrying in %v", err, serverJoin.RetryInterval) } time.Sleep(serverJoin.RetryInterval) diff --git a/command/agent/retry_join_test.go b/command/agent/retry_join_test.go index f1eb7fd3541..b07848d2c9c 100644 --- a/command/agent/retry_join_test.go +++ b/command/agent/retry_join_test.go @@ -9,7 +9,6 @@ import ( "time" "github.com/hashicorp/nomad/testutil" - "github.com/hashicorp/nomad/version" "github.com/mitchellh/cli" "github.com/stretchr/testify/require" ) @@ -31,43 +30,37 @@ func (m *MockDiscover) Names() []string { func TestRetryJoin_Integration(t *testing.T) { t.Parallel() + + // Create two agents and have one retry join the other agent := NewTestAgent(t, t.Name(), nil) defer agent.Shutdown() - doneCh := make(chan struct{}) - shutdownCh := make(chan struct{}) - - defer func() { - close(shutdownCh) - <-doneCh - }() + agent2 := NewTestAgent(t, t.Name(), func(c *Config) { + c.NodeName = "foo" + if c.Server.ServerJoin == nil { + c.Server.ServerJoin = &ServerJoin{} + } + c.Server.ServerJoin.RetryJoin = []string{agent.Config.normalizedAddrs.Serf} + c.Server.ServerJoin.RetryInterval = 1 * time.Second + }) + defer agent2.Shutdown() + // Create a fake command and have it wrap the second agent and run the retry + // join handler cmd := &Command{ - Version: version.GetVersion(), - ShutdownCh: shutdownCh, Ui: &cli.BasicUi{ Reader: os.Stdin, Writer: os.Stdout, ErrorWriter: os.Stderr, }, + agent: agent2.Agent, } - serfAddr := agent.Config.normalizedAddrs.Serf - - args := []string{ - "-dev", - "-node", "foo", - "-retry-join", serfAddr, - "-retry-interval", "1s", + if err := cmd.handleRetryJoin(agent2.Config); err != nil { + t.Fatalf("handleRetryJoin failed: %v", err) } - go func() { - if code := cmd.Run(args); code != 0 { - t.Logf("bad: %d", code) - } - close(doneCh) - }() - + // Ensure the retry join occured. testutil.WaitForResult(func() (bool, error) { mem := agent.server.Members() if len(mem) != 2 { @@ -205,8 +198,6 @@ func TestRetryJoin_Client(t *testing.T) { func TestRetryJoin_Validate(t *testing.T) { t.Parallel() - require := require.New(t) - type validateExpect struct { config *Config isValid bool @@ -225,7 +216,7 @@ func TestRetryJoin_Validate(t *testing.T) { }, RetryJoin: []string{"127.0.0.1"}, RetryMaxAttempts: 0, - RetryInterval: "0", + RetryInterval: 0, StartJoin: []string{}, }, }, @@ -243,7 +234,7 @@ func TestRetryJoin_Validate(t *testing.T) { }, StartJoin: []string{"127.0.0.1"}, RetryMaxAttempts: 0, - RetryInterval: "0", + RetryInterval: 0, RetryJoin: []string{}, }, }, @@ -261,7 +252,7 @@ func TestRetryJoin_Validate(t *testing.T) { }, StartJoin: []string{}, RetryMaxAttempts: 1, - RetryInterval: "0", + RetryInterval: 0, RetryJoin: []string{}, }, }, @@ -279,8 +270,7 @@ func TestRetryJoin_Validate(t *testing.T) { }, StartJoin: []string{}, RetryMaxAttempts: 0, - RetryInterval: "3s", - retryInterval: time.Duration(3), + RetryInterval: 3 * time.Second, RetryJoin: []string{}, }, }, @@ -333,51 +323,26 @@ func TestRetryJoin_Validate(t *testing.T) { Server: &ServerConfig{ ServerJoin: &ServerJoin{ RetryJoin: []string{"127.0.0.1"}, - RetryMaxAttempts: 0, - RetryInterval: 0, + RetryMaxAttempts: 1, + RetryInterval: 1, StartJoin: []string{}, }, - StartJoin: []string{}, - RetryMaxAttempts: 0, - RetryInterval: "30s", - RetryJoin: []string{}, }, }, isValid: true, reason: "server server_join should be valid", }, - { - config: &Config{ - Server: &ServerConfig{ - StartJoin: []string{"127.0.0.1"}, - RetryMaxAttempts: 1, - RetryInterval: "0", - RetryJoin: []string{}, - }, - }, - isValid: true, - reason: "server deprecated retry_join configuration should be valid", - }, - { - config: &Config{ - Server: &ServerConfig{ - RetryInterval: "30s", - ServerJoin: &ServerJoin{ - RetryJoin: []string{"127.0.0.1"}, - RetryMaxAttempts: 0, - RetryInterval: time.Duration(20) * time.Second, - StartJoin: []string{}, - }, - }, - }, - isValid: true, - reason: "ignore default value for retry interval", - }, } joiner := retryJoiner{} for _, scenario := range scenarios { - err := joiner.Validate(scenario.config) - require.Equal(err == nil, scenario.isValid, scenario.reason) + t.Run(scenario.reason, func(t *testing.T) { + err := joiner.Validate(scenario.config) + if scenario.isValid { + require.NoError(t, err) + } else { + require.Error(t, err) + } + }) } } From 92096d6e62357466c49da037ff86fb462f05bbb4 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Thu, 31 May 2018 10:49:19 -0700 Subject: [PATCH 20/21] Fix website --- ...o-join.html.md => cloud_auto_join.html.md} | 4 +- .../docs/agent/configuration/client.html.md | 2 +- .../docs/agent/configuration/server.html.md | 30 ++++---- .../agent/configuration/server_join.html.md | 75 +++++++++++-------- .../source/docs/commands/agent.html.md.erb | 13 ++-- website/source/layouts/docs.erb | 11 +-- 6 files changed, 76 insertions(+), 59 deletions(-) rename website/source/docs/agent/{configuration/cloud-auto-join.html.md => cloud_auto_join.html.md} (97%) diff --git a/website/source/docs/agent/configuration/cloud-auto-join.html.md b/website/source/docs/agent/cloud_auto_join.html.md similarity index 97% rename from website/source/docs/agent/configuration/cloud-auto-join.html.md rename to website/source/docs/agent/cloud_auto_join.html.md index 71c9bc847d4..d733fe7066e 100644 --- a/website/source/docs/agent/configuration/cloud-auto-join.html.md +++ b/website/source/docs/agent/cloud_auto_join.html.md @@ -8,7 +8,9 @@ description: |- # Cloud Auto-joining -As of Nomad 0.8.4, `retry-join` accepts a unified interface using the +As of Nomad 0.8.4, +[`retry_join`](/docs/agent/configuration/server_join.html#retry_join) accepts a +unified interface using the [go-discover](https://github.com/hashicorp/go-discover) library for doing automatic cluster joining using cloud metadata. To use retry-join with a supported cloud provider, specify the configuration on the command line or diff --git a/website/source/docs/agent/configuration/client.html.md b/website/source/docs/agent/configuration/client.html.md index 7cee5601a59..2ba20be5b3a 100644 --- a/website/source/docs/agent/configuration/client.html.md +++ b/website/source/docs/agent/configuration/client.html.md @@ -90,7 +90,7 @@ client { receive work. This may be specified as an IP address or DNS, with or without the port. If the port is omitted, the default port of `4647` is used. -- `server_join` ([server_join](#server-join): nil) - Specifies +- `server_join` ([server_join][server-join]: nil) - Specifies how the Nomad client will connect to Nomad servers. The `start_join` field is not supported on the client. The retry_join fields may directly specify the server address or use go-discover syntax for auto-discovery. See the diff --git a/website/source/docs/agent/configuration/server.html.md b/website/source/docs/agent/configuration/server.html.md index d1919872d11..6da8ab88e85 100644 --- a/website/source/docs/agent/configuration/server.html.md +++ b/website/source/docs/agent/configuration/server.html.md @@ -135,10 +135,10 @@ server { cluster again when starting. This flag allows the previous state to be used to rejoin the cluster. -- `server_join` ([server_join](#server-join): nil) - Specifies - how the Nomad client will connect to Nomad servers. The retry_join fields may - directly specify the server address or use go-discover syntax for - auto-discovery. See the documentation for more detail. +- `server_join` ([server_join][server-join]: nil) - Specifies + how the Nomad server will connect to other Nomad servers. The `retry_join` + fields may directly specify the server address or use go-discover syntax for + auto-discovery. See the [server_join documentation][server-join] for more detail. - `upgrade_version` `(string: "")` - A custom version of the format X.Y.Z to use in place of the Nomad version when custom upgrades are enabled in Autopilot. @@ -153,25 +153,25 @@ server { succeeds. After one succeeds, no further addresses will be contacted. This is useful for cases where we know the address will become available eventually. Use `retry_join` with an array as a replacement for `start_join`, **do not use - both options**. See the [server_join](#server-join) + both options**. See the [server_join][server-join] section for more information on the format of the string. This field is - deprecated in favor of [server_join](#server-join). + deprecated in favor of the [server_join stanza][server-join]. - `retry_interval` `(string: "30s")` - Specifies the time to wait between retry - join attempts. This field is deprecated in favor of - [server_join](#server-join). + join attempts. This field is deprecated in favor of the [server_join + stanza][server-join]. - `retry_max` `(int: 0)` - Specifies the maximum number of join attempts to be made before exiting with a return code of 1. By default, this is set to 0 - which is interpreted as infinite retries. This field is deprecated in favor - of [server_join](#server-join). + which is interpreted as infinite retries. This field is deprecated in favor of + the [server_join stanza][server-join]. - `start_join` `(array: [])` - Specifies a list of server addresses to join on startup. If Nomad is unable to join with any of the specified - addresses, agent startup will fail. See the - [server address format](#server-address-format) section for more information - on the format of the string. This field is deprecated in favor of - [server_join](#server-join). + addresses, agent startup will fail. See the [server address + format](/docs/agent/configuration/server_join.html#server-address-format) + section for more information on the format of the string. This field is + deprecated in favor of the [server_join stanza][server-join]. ## `server` Examples @@ -219,4 +219,4 @@ server { ``` [encryption]: /docs/agent/encryption.html "Nomad Agent Encryption" -[server-join]: /docs/agent/configuration/server_join.html.md "Server Join" +[server-join]: /docs/agent/configuration/server_join.html "Server Join" diff --git a/website/source/docs/agent/configuration/server_join.html.md b/website/source/docs/agent/configuration/server_join.html.md index 4fa823e2356..82620a781c2 100644 --- a/website/source/docs/agent/configuration/server_join.html.md +++ b/website/source/docs/agent/configuration/server_join.html.md @@ -1,14 +1,26 @@ --- layout: "docs" page_title: "server_join Stanza - Agent Configuration" -sidebar_current: "docs-agent-configuration-server_join" +sidebar_current: "docs-agent-configuration--server-join" description: |- - The server_join stanza specifies how the Nomad agent will discover and connect to Nomad servers. + The "server_join" stanza specifies how the Nomad agent will discover and connect to Nomad servers. --- # `server_join` Stanza -The server_join stanza specifies how the Nomad agent will discover and connect to Nomad servers. + + + + + +
    Placement + server -> **server_join** +
    + client -> **server_join** +
    + +The `server_join` stanza specifies how the Nomad agent will discover and connect +to Nomad servers. ```hcl server_join { @@ -20,46 +32,46 @@ server_join { ## `server_join` Parameters -- `retry_join` `(array: [])` - Specifies a list of server - addresses to retry joining if the first attempt fails. This is similar to - [`start_join`](#start_join), but only invokes if the initial join attempt - fails, and is available to both Nomad servers and clients, while - `start_join` is only defined for Nomad servers. The list of addresses will - be tried in the order specified, until one succeeds. After one succeeds, no - further addresses will be contacted. This is - useful for cases where we know the address will become available eventually. - Use `retry_join` with an array as a replacement for `start_join`, **do not use - both options**. - - Address format includes both using IP addresses as well as an interface to the +- `retry_join` `(array: [])` - Specifies a list of server addresses to + join. This is similar to [`start_join`](#start_join), but will continue to + be attempted even if the initial join attempt fails, up to + [retry_max](#retry_max). Further, `retry_join` is available to + both Nomad servers and clients, while `start_join` is only defined for Nomad + servers. This is useful for cases where we know the address will become + available eventually. Use `retry_join` with an array as a replacement for + `start_join`, **do not use both options**. + + Address format includes both using IP addresses as well as an interface to the [go-discover](https://github.com/hashicorp/go-discover) library for doing - automated cluster joining using cloud metadata. - See Cloud Auto Join`([CloudAutoJoin][cloud_auto_join]: nil). + automated cluster joining using cloud metadata. See [Cloud + Auto-join][cloud_auto_join] for more information. - ``` + ``` server_join { retry_join = [ "1.1.1.1", "2.2.2.2" ] } ``` - Using the `go-discover` interface, this can be defined both in a client or + Using the `go-discover` interface, this can be defined both in a client or server configuration as well as provided as a command-line argument. - ``` + + ``` server_join { retry_join = [ "provider=aws tag_key=..." ] } ``` - See the [server address format](#server-address-format) for more information + + See the [server address format](#server-address-format) for more information about expected server address formats. -- `retry_interval` `(string: "30s")` - Specifies the time to wait between retry +- `retry_interval` `(string: "30s")` - Specifies the time to wait between retry join attempts. -- `retry_max` `(int: 0)` - Specifies the maximum number of join attempts to be +- `retry_max` `(int: 0)` - Specifies the maximum number of join attempts to be made before exiting with a return code of 1. By default, this is set to 0 which is interpreted as infinite retries. -- `start_join` `(array: [])` - Specifies a list of server addresses to +- `start_join` `(array: [])` - Specifies a list of server addresses to join on startup. If Nomad is unable to join with any of the specified addresses, agent startup will fail. See the [server address format](#server-address-format) section for more information @@ -67,13 +79,13 @@ server_join { will result in a configuration parse error if included in a client configuration. -### Server Address Format +## Server Address Format This section describes the acceptable syntax and format for describing the location of a Nomad server. There are many ways to reference a Nomad server, including directly by IP address and resolving through DNS. -#### Directly via IP Address +### Directly via IP Address It is possible to address another Nomad server using its IP address. This is done in the `ip:port` format, such as: @@ -89,7 +101,7 @@ unless configured otherwise: 1.2.3.4 => 1.2.3.4:4648 ``` -#### Via Domains or DNS +### Via Domains or DNS It is possible to address another Nomad server using its DNS address. This is done in the `address:port` format, such as: @@ -105,14 +117,15 @@ unless configured otherwise: nomad-01.company.local => nomad-01.company.local:4648 ``` -#### Via the go-discover interface +### Via the go-discover interface As of Nomad 0.8.4, `retry_join` accepts a unified interface using the [go-discover](https://github.com/hashicorp/go-discover) library for doing -automated cluster joining using cloud metadata. +automated cluster joining using cloud metadata. See [Cloud +Auto-join][cloud_auto_join] for more information. ``` "provider=aws tag_key=..." => 1.2.3.4:4648 - -See ([CloudAutoJoin][cloud_auto_join]: nil) for further information. ``` + +[cloud_auto_join]: /docs/agent/cloud_auto_join.html "Nomad Cloud Auto-join" diff --git a/website/source/docs/commands/agent.html.md.erb b/website/source/docs/commands/agent.html.md.erb index ea3b4f40b64..4afb7717802 100644 --- a/website/source/docs/commands/agent.html.md.erb +++ b/website/source/docs/commands/agent.html.md.erb @@ -71,13 +71,14 @@ via CLI arguments. The `agent` command accepts the following arguments: * `-region=`: Equivalent to the [region](#region) config option. * `-rejoin`: Equivalent to the [rejoin_after_leave](#rejoin_after_leave) config option. * `-retry-interval`: Equivalent to the [retry_interval](#retry_interval) config option. -* `-retry-join`: Similar to `-join` but allows retrying a join if the first attempt fails. +* `-retry-join`: Similar to `-join` but allows retrying a join if the first attempt fails. -Note that `retry_join` can be defined for only servers as a command-line -flag (clients are only able to define via the client configuration). -```sh -$ nomad agent -retry-join "127.0.0.1:4648" -` + ```sh + $ nomad agent -retry-join "127.0.0.1:4648" + ``` + + Note that `retry_join` can be defined for only servers as a command-line + flag (clients are only able to define via the client configuration). * `-retry-max`: Similar to the [retry_max](#retry_max) config option. * `-server`: Enable server mode on the local agent. diff --git a/website/source/layouts/docs.erb b/website/source/layouts/docs.erb index 2fa5684181f..eea874b8f7e 100644 --- a/website/source/layouts/docs.erb +++ b/website/source/layouts/docs.erb @@ -396,6 +396,9 @@ Nomad Agent