From a9cf2638886280a352e673b54d76c6433e863494 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Sat, 30 May 2020 09:27:33 -0400 Subject: [PATCH 1/3] implement raft multiplier --- command/agent/agent.go | 20 ++++++ command/agent/agent_test.go | 108 +++++++++++++++++++++++++++++ command/agent/config.go | 7 ++ command/agent/config_parse_test.go | 1 + command/agent/config_test.go | 6 +- command/agent/testdata/basic.hcl | 1 + command/agent/testdata/basic.json | 1 + 7 files changed, 141 insertions(+), 3 deletions(-) diff --git a/command/agent/agent.go b/command/agent/agent.go index 97434a701da..3949afc7278 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -43,6 +43,14 @@ const ( // roles used in identifying Consul entries for Nomad agents consulRoleServer = "server" consulRoleClient = "client" + + // DefaultRaftMultiplier is used as a baseline Raft configuration that + // will be reliable on a very basic server. + DefaultRaftMultiplier = 1 + + // MaxRaftMultiplier is a fairly arbitrary upper bound that limits the + // amount of performance detuning that's possible. + MaxRaftMultiplier = 10 ) // Agent is a long running daemon that is used to run both @@ -180,6 +188,18 @@ func convertServerConfig(agentConfig *Config) (*nomad.Config, error) { if agentConfig.Server.RaftProtocol != 0 { conf.RaftConfig.ProtocolVersion = raft.ProtocolVersion(agentConfig.Server.RaftProtocol) } + raftMultiplier := int(DefaultRaftMultiplier) + if agentConfig.Server.RaftMultiplier != nil && *agentConfig.Server.RaftMultiplier != 0 { + raftMultiplier = *agentConfig.Server.RaftMultiplier + if raftMultiplier < 1 || raftMultiplier > MaxRaftMultiplier { + return nil, fmt.Errorf("raft_multiplier cannot be %d. Must be between 1 and %d", *agentConfig.Server.RaftMultiplier, MaxRaftMultiplier) + } + } + conf.RaftConfig.ElectionTimeout *= time.Duration(raftMultiplier) + conf.RaftConfig.HeartbeatTimeout *= time.Duration(raftMultiplier) + conf.RaftConfig.LeaderLeaseTimeout *= time.Duration(raftMultiplier) + conf.RaftConfig.CommitTimeout *= time.Duration(raftMultiplier) + if agentConfig.Server.NumSchedulers != nil { conf.NumSchedulers = *agentConfig.Server.NumSchedulers } diff --git a/command/agent/agent_test.go b/command/agent/agent_test.go index da278533b5e..fb09926f394 100644 --- a/command/agent/agent_test.go +++ b/command/agent/agent_test.go @@ -1,6 +1,7 @@ package agent import ( + "fmt" "io/ioutil" "os" "path/filepath" @@ -362,6 +363,113 @@ func TestAgent_ServerConfig_Limits_OK(t *testing.T) { } } +func TestAgent_ServerConfig_RaftMultiplier_Ok(t *testing.T) { + t.Parallel() + + cases := []struct { + multiplier *int + electionTimout time.Duration + heartbeatTimeout time.Duration + leaderLeaseTimeout time.Duration + commitTimeout time.Duration + }{ + // nil, 0 are the defaults of the Raft library. + // Expected values are hardcoded to detect changes from raft. + { + multiplier: nil, + + electionTimout: 1 * time.Second, + heartbeatTimeout: 1 * time.Second, + leaderLeaseTimeout: 500 * time.Millisecond, + commitTimeout: 50 * time.Millisecond, + }, + + { + multiplier: helper.IntToPtr(0), + + electionTimout: 1 * time.Second, + heartbeatTimeout: 1 * time.Second, + leaderLeaseTimeout: 500 * time.Millisecond, + commitTimeout: 50 * time.Millisecond, + }, + { + multiplier: helper.IntToPtr(1), + + electionTimout: 1 * time.Second, + heartbeatTimeout: 1 * time.Second, + leaderLeaseTimeout: 500 * time.Millisecond, + commitTimeout: 50 * time.Millisecond, + }, + { + multiplier: helper.IntToPtr(5), + + electionTimout: 5 * time.Second, + heartbeatTimeout: 5 * time.Second, + leaderLeaseTimeout: 2500 * time.Millisecond, + commitTimeout: 250 * time.Millisecond, + }, + { + multiplier: helper.IntToPtr(6), + + electionTimout: 6 * time.Second, + heartbeatTimeout: 6 * time.Second, + leaderLeaseTimeout: 3000 * time.Millisecond, + commitTimeout: 300 * time.Millisecond, + }, + { + multiplier: helper.IntToPtr(10), + + electionTimout: 10 * time.Second, + heartbeatTimeout: 10 * time.Second, + leaderLeaseTimeout: 5000 * time.Millisecond, + commitTimeout: 500 * time.Millisecond, + }, + } + + for _, tc := range cases { + v := "default" + if tc.multiplier != nil { + v = fmt.Sprintf("%v", *tc.multiplier) + } + t.Run(v, func(t *testing.T) { + conf := DevConfig(nil) + require.NoError(t, conf.normalizeAddrs()) + + conf.Server.RaftMultiplier = tc.multiplier + + serverConf, err := convertServerConfig(conf) + require.NoError(t, err) + + assert.Equal(t, tc.electionTimout, serverConf.RaftConfig.ElectionTimeout, "election timeout") + assert.Equal(t, tc.heartbeatTimeout, serverConf.RaftConfig.HeartbeatTimeout, "heartbeat timeout") + assert.Equal(t, tc.leaderLeaseTimeout, serverConf.RaftConfig.LeaderLeaseTimeout, "leader lease timeout") + assert.Equal(t, tc.commitTimeout, serverConf.RaftConfig.CommitTimeout, "commit timeout") + }) + } +} + +func TestAgent_ServerConfig_RaftMultiplier_Bad(t *testing.T) { + t.Parallel() + + cases := []int{ + -1, + 100, + } + + for _, tc := range cases { + t.Run(fmt.Sprintf("%v", tc), func(t *testing.T) { + conf := DevConfig(nil) + require.NoError(t, conf.normalizeAddrs()) + + conf.Server.RaftMultiplier = &tc + + _, err := convertServerConfig(conf) + require.Error(t, err) + require.Contains(t, err.Error(), "raft_multiplier cannot be") + }) + } +} + func TestAgent_ClientConfig(t *testing.T) { t.Parallel() conf := DefaultConfig() diff --git a/command/agent/config.go b/command/agent/config.go index 8e7c658ba73..4d08f64b74a 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -355,6 +355,9 @@ type ServerConfig struct { // RaftProtocol is the Raft protocol version to speak. This must be from [1-3]. RaftProtocol int `hcl:"raft_protocol"` + // RaftMultiplier scales the Raft timing parameters + RaftMultiplier *int `hcl:"raft_multiplier"` + // NumSchedulers is the number of scheduler thread that are run. // This can be as many as one per core, or zero to disable this server // from doing any scheduling work. @@ -1315,6 +1318,10 @@ func (a *ServerConfig) Merge(b *ServerConfig) *ServerConfig { if b.RaftProtocol != 0 { result.RaftProtocol = b.RaftProtocol } + if b.RaftMultiplier != nil { + c := *b.RaftMultiplier + result.RaftMultiplier = &c + } if b.NumSchedulers != nil { result.NumSchedulers = helper.IntToPtr(*b.NumSchedulers) } diff --git a/command/agent/config_parse_test.go b/command/agent/config_parse_test.go index b9a06d8c90e..f0d09b5f15d 100644 --- a/command/agent/config_parse_test.go +++ b/command/agent/config_parse_test.go @@ -97,6 +97,7 @@ var basicConfig = &Config{ DataDir: "/tmp/data", ProtocolVersion: 3, RaftProtocol: 3, + RaftMultiplier: helper.IntToPtr(4), NumSchedulers: helper.IntToPtr(2), EnabledSchedulers: []string{"test"}, NodeGCThreshold: "12h", diff --git a/command/agent/config_test.go b/command/agent/config_test.go index ce4fb96a03e..7c14432c7ef 100644 --- a/command/agent/config_test.go +++ b/command/agent/config_test.go @@ -132,6 +132,7 @@ func TestConfig_Merge(t *testing.T) { DataDir: "/tmp/data1", ProtocolVersion: 1, RaftProtocol: 1, + RaftMultiplier: helper.IntToPtr(5), NumSchedulers: helper.IntToPtr(1), NodeGCThreshold: "1h", HeartbeatGrace: 30 * time.Second, @@ -317,6 +318,7 @@ func TestConfig_Merge(t *testing.T) { DataDir: "/tmp/data2", ProtocolVersion: 2, RaftProtocol: 2, + RaftMultiplier: helper.IntToPtr(6), NumSchedulers: helper.IntToPtr(2), EnabledSchedulers: []string{structs.JobTypeBatch}, NodeGCThreshold: "12h", @@ -425,9 +427,7 @@ func TestConfig_Merge(t *testing.T) { result := c0.Merge(c1) result = result.Merge(c2) result = result.Merge(c3) - if !reflect.DeepEqual(result, c3) { - t.Fatalf("bad:\n%#v\n%#v", result, c3) - } + require.Equal(t, c3, result) } func TestConfig_ParseConfigFile(t *testing.T) { diff --git a/command/agent/testdata/basic.hcl b/command/agent/testdata/basic.hcl index cad07e2cc7d..4e2036b4f02 100644 --- a/command/agent/testdata/basic.hcl +++ b/command/agent/testdata/basic.hcl @@ -129,6 +129,7 @@ server { redundancy_zone = "foo" upgrade_version = "0.8.0" encrypt = "abc" + raft_multiplier = 4 server_join { retry_join = ["1.1.1.1", "2.2.2.2"] diff --git a/command/agent/testdata/basic.json b/command/agent/testdata/basic.json index 2a5edafb018..b202eb07935 100644 --- a/command/agent/testdata/basic.json +++ b/command/agent/testdata/basic.json @@ -276,6 +276,7 @@ "num_schedulers": 2, "protocol_version": 3, "raft_protocol": 3, + "raft_multiplier": 4, "redundancy_zone": "foo", "rejoin_after_leave": true, "retry_interval": "15s", From 38f6be0b8d721aada868a25c8df4c494c883bd11 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Sun, 31 May 2020 12:24:44 -0400 Subject: [PATCH 2/3] document raft_multiplier --- website/pages/docs/configuration/server.mdx | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/website/pages/docs/configuration/server.mdx b/website/pages/docs/configuration/server.mdx index aafc402751d..dfa06d8ab51 100644 --- a/website/pages/docs/configuration/server.mdx +++ b/website/pages/docs/configuration/server.mdx @@ -141,6 +141,14 @@ server { features and is typically not required as the agent internally knows the latest version, but may be useful in some upgrade scenarios. +- `raft_multiplier` `(int: 1)` - An integer multiplier used by Nomad servers to + scale key Raft timing parameters. Omitting this value or setting it to 0 uses + default timing described below. Lower values are used to tighten timing and + increase sensitivity while higher values relax timings and reduce sensitivity. + Tuning this affects the time it takes Nomad to detect leader failures and to + perform leader elections, at the expense of requiring more network and CPU + resources for better performance. The maximum allowed value is 10. + - `redundancy_zone` `(string: "")` - (Enterprise-only) Specifies the redundancy zone that this server will be a part of for Autopilot management. For more information, see the [Autopilot Guide](https://learn.hashicorp.com/nomad/operating-nomad/autopilot). @@ -275,4 +283,4 @@ server { [encryption]: https://learn.hashicorp.com/nomad/transport-security/gossip-encryption 'Nomad Encryption Overview' [server-join]: /docs/configuration/server_join 'Server Join' [update-scheduler-config]: /api-docs/operator#update-scheduler-configuration 'Scheduler Config' -[bootstrapping a cluster]: /docs/faq#bootstrapping \ No newline at end of file +[bootstrapping a cluster]: /docs/faq#bootstrapping From 744539cdb11c73ed990bd6b631ec884595963593 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Thu, 18 Jun 2020 08:04:22 -0400 Subject: [PATCH 3/3] docs: elaborate on raft_multipler default --- website/pages/docs/configuration/server.mdx | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/website/pages/docs/configuration/server.mdx b/website/pages/docs/configuration/server.mdx index dfa06d8ab51..b298afcce48 100644 --- a/website/pages/docs/configuration/server.mdx +++ b/website/pages/docs/configuration/server.mdx @@ -149,6 +149,17 @@ server { perform leader elections, at the expense of requiring more network and CPU resources for better performance. The maximum allowed value is 10. + By default, Nomad will use the highest-performance timing, currently equivalent + to setting this to a value of 1. Increasing the timings makes leader election + less likely during periods of networking issues or resource starvation. Since + leader elections pause Nomad's normal work, it may be beneficial for slow or + unreliable networks to wait longer before electing a new leader. The tradeoff + when raising this value is that during network partitions or other events + (server crash) where a leader is lost, Nomad will not elect a new leader for + a longer period of time than the default. The ['nomad.nomad.leader.barrier' and + `nomad.raft.leader.lastContact` metrics](/docs/telemetry/metrics is a good + indicator of how often leader elections occur and raft latency. + - `redundancy_zone` `(string: "")` - (Enterprise-only) Specifies the redundancy zone that this server will be a part of for Autopilot management. For more information, see the [Autopilot Guide](https://learn.hashicorp.com/nomad/operating-nomad/autopilot).