From 47c654016dd192dafe151d4fffb87b0c16343e33 Mon Sep 17 00:00:00 2001 From: Matt Mukerjee Date: Sat, 28 Aug 2021 21:30:27 +0000 Subject: [PATCH 1/3] Add FailoverHeartbeatTTL to config FailoverHeartbeatTTL is the amount of time to wait after a server leader failure before considering reallocating client tasks. This TTL should be fairly long as the new server leader needs to rebuild the entire heartbeat map for the cluster. In deployments with a small number of machines, the default TTL (5m) may be unnecessary long. Let's allow operators to configure this value in their config files. --- command/agent/agent.go | 3 +++ command/agent/agent_test.go | 5 +++++ command/agent/config.go | 12 ++++++++++++ command/agent/config_parse.go | 1 + command/agent/testdata/basic.hcl | 1 + command/agent/testdata/basic.json | 1 + website/content/docs/configuration/server.mdx | 6 ++++++ 7 files changed, 29 insertions(+) diff --git a/command/agent/agent.go b/command/agent/agent.go index a500dea7f43..1943fb68289 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -380,6 +380,9 @@ func convertServerConfig(agentConfig *Config) (*nomad.Config, error) { if maxHPS := agentConfig.Server.MaxHeartbeatsPerSecond; maxHPS != 0 { conf.MaxHeartbeatsPerSecond = maxHPS } + if failoverTTL := agentConfig.Server.FailoverHeartbeatTTL; failoverTTL != 0 { + conf.FailoverHeartbeatTTL = failoverTTL + } if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ServerServiceName == "" { return nil, fmt.Errorf("server_service_name must be set when auto_advertise is enabled") diff --git a/command/agent/agent_test.go b/command/agent/agent_test.go index 39f8380b37e..0182ca26429 100644 --- a/command/agent/agent_test.go +++ b/command/agent/agent_test.go @@ -141,6 +141,11 @@ func TestAgent_ServerConfig(t *testing.T) { require.NoError(t, err) require.Equal(t, float64(11.0), out.MaxHeartbeatsPerSecond) + conf.Server.FailoverHeartbeatTTL = 337 * time.Second + out, err = a.serverConfig() + require.NoError(t, err) + require.Equal(t, 337*time.Second, out.FailoverHeartbeatTTL) + // Defaults to the global bind addr conf.Addresses.RPC = "" conf.Addresses.Serf = "" diff --git a/command/agent/config.go b/command/agent/config.go index 687b3f03748..439fdebfb84 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -441,6 +441,12 @@ type ServerConfig struct { // to meet the target rate. MaxHeartbeatsPerSecond float64 `hcl:"max_heartbeats_per_second"` + // FailoverHeartbeatTTL is the TTL applied to heartbeats after + // a new leader is elected, since we no longer know the status + // of all the heartbeats. + FailoverHeartbeatTTL time.Duration + FailoverHeartbeatTTLHCL string `hcl:"failover_heartbeat_ttl" json:"-"` + // StartJoin is a list of addresses to attempt to join when the // agent starts. If Serf is unable to communicate with any of these // addresses, then the agent will error and exit. @@ -1484,6 +1490,12 @@ func (a *ServerConfig) Merge(b *ServerConfig) *ServerConfig { if b.MaxHeartbeatsPerSecond != 0.0 { result.MaxHeartbeatsPerSecond = b.MaxHeartbeatsPerSecond } + if b.FailoverHeartbeatTTL != 0 { + result.FailoverHeartbeatTTL = b.FailoverHeartbeatTTL + } + if b.FailoverHeartbeatTTLHCL != "" { + result.FailoverHeartbeatTTLHCL = b.FailoverHeartbeatTTLHCL + } if b.RetryMaxAttempts != 0 { result.RetryMaxAttempts = b.RetryMaxAttempts } diff --git a/command/agent/config_parse.go b/command/agent/config_parse.go index 319e7ef195c..b745835711d 100644 --- a/command/agent/config_parse.go +++ b/command/agent/config_parse.go @@ -55,6 +55,7 @@ func ParseConfigFile(path string) (*Config, error) { {"client.server_join.retry_interval", &c.Client.ServerJoin.RetryInterval, &c.Client.ServerJoin.RetryIntervalHCL}, {"server.heartbeat_grace", &c.Server.HeartbeatGrace, &c.Server.HeartbeatGraceHCL}, {"server.min_heartbeat_ttl", &c.Server.MinHeartbeatTTL, &c.Server.MinHeartbeatTTLHCL}, + {"server.failover_heartbeat_ttl", &c.Server.FailoverHeartbeatTTL, &c.Server.FailoverHeartbeatTTLHCL}, {"server.retry_interval", &c.Server.RetryInterval, &c.Server.RetryIntervalHCL}, {"server.server_join.retry_interval", &c.Server.ServerJoin.RetryInterval, &c.Server.ServerJoin.RetryIntervalHCL}, {"consul.timeout", &c.Consul.Timeout, &c.Consul.TimeoutHCL}, diff --git a/command/agent/testdata/basic.hcl b/command/agent/testdata/basic.hcl index b4a55197d57..c28cdfd938f 100644 --- a/command/agent/testdata/basic.hcl +++ b/command/agent/testdata/basic.hcl @@ -120,6 +120,7 @@ server { heartbeat_grace = "30s" min_heartbeat_ttl = "33s" max_heartbeats_per_second = 11.0 + failover_heartbeat_ttl = "330s" retry_join = ["1.1.1.1", "2.2.2.2"] start_join = ["1.1.1.1", "2.2.2.2"] retry_max = 3 diff --git a/command/agent/testdata/basic.json b/command/agent/testdata/basic.json index 02de2490c4c..a92d7748d50 100644 --- a/command/agent/testdata/basic.json +++ b/command/agent/testdata/basic.json @@ -273,6 +273,7 @@ "job_gc_threshold": "12h", "max_heartbeats_per_second": 11, "min_heartbeat_ttl": "33s", + "failover_heartbeat_ttl": "330s", "node_gc_threshold": "12h", "non_voting_server": true, "num_schedulers": 2, diff --git a/website/content/docs/configuration/server.mdx b/website/content/docs/configuration/server.mdx index aa9763f774f..fefe787ac61 100644 --- a/website/content/docs/configuration/server.mdx +++ b/website/content/docs/configuration/server.mdx @@ -137,6 +137,12 @@ server { second is a tradeoff as it lowers failure detection time of nodes at the tradeoff of false positives and increased load on the leader. +- `failover_heartbeat_ttl` `(string: "5m")` - Specifies the TTL applied to + heartbeats after a new leader is elected, since we no longer know the status + of all the heartbeats. This is specified using a label suffix like "30s" or + "1h". Lowering the failover TTL is a tradeoff as it lowers failure detection + time of nodes at the tradeoff of false positives. + - `non_voting_server` `(bool: false)` - (Enterprise-only) Specifies whether this server will act as a non-voting member of the cluster to help provide read scalability. From 69a2bc767736f18714b29ec3c67c2c45bf87abac Mon Sep 17 00:00:00 2001 From: Matt Mukerjee Date: Sat, 4 Sep 2021 17:24:08 +0000 Subject: [PATCH 2/3] fixup! Add FailoverHeartbeatTTL to config --- command/agent/config_parse_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/command/agent/config_parse_test.go b/command/agent/config_parse_test.go index 9f6d66a6918..5ee1305012f 100644 --- a/command/agent/config_parse_test.go +++ b/command/agent/config_parse_test.go @@ -112,6 +112,8 @@ var basicConfig = &Config{ MinHeartbeatTTL: 33 * time.Second, MinHeartbeatTTLHCL: "33s", MaxHeartbeatsPerSecond: 11.0, + FailoverHeartbeatTTL: 330 * time.Second, + FailoverHeartbeatTTLHCL: "330s", RetryJoin: []string{"1.1.1.1", "2.2.2.2"}, StartJoin: []string{"1.1.1.1", "2.2.2.2"}, RetryInterval: 15 * time.Second, From a4c6822250733b5b863c5574690ed881e78c9022 Mon Sep 17 00:00:00 2001 From: Luiz Aoqui Date: Wed, 6 Oct 2021 17:27:59 -0400 Subject: [PATCH 3/3] docs: add changelog entry for #11127 and highlight impact of failover_heartbeat_ttl --- .changelog/11127.txt | 3 +++ website/content/docs/configuration/server.mdx | 16 ++++++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) create mode 100644 .changelog/11127.txt diff --git a/.changelog/11127.txt b/.changelog/11127.txt new file mode 100644 index 00000000000..245a4f8ef7e --- /dev/null +++ b/.changelog/11127.txt @@ -0,0 +1,3 @@ +```release-note:improvement +server: Allow tuning of node failover heartbeat TTL +``` diff --git a/website/content/docs/configuration/server.mdx b/website/content/docs/configuration/server.mdx index fefe787ac61..b8b4647d2d0 100644 --- a/website/content/docs/configuration/server.mdx +++ b/website/content/docs/configuration/server.mdx @@ -131,18 +131,22 @@ server { a tradeoff as it lowers failure detection time of nodes at the tradeoff of false positives and increased load on the leader. +- `failover_heartbeat_ttl` `(string: "5m")` - Specifies the TTL applied to + heartbeats after a new leader is elected, since we no longer know the status + of all the heartbeats. This is specified using a label suffix like "30s" or + "1h". + + ~> Lowering the `failover_heartbeat_ttl` is a tradeoff as it lowers failure + detection time of nodes at the tradeoff of false positives. False positives + could cause all clients to stop their allocations if a leadership transition + lasts longer than `heartbeat_grace + failover_heartbeat_ttl`. + - `max_heartbeats_per_second` `(float: 50.0)` - Specifies the maximum target rate of heartbeats being processed per second. This allows the TTL to be increased to meet the target rate. Increasing the maximum heartbeats per second is a tradeoff as it lowers failure detection time of nodes at the tradeoff of false positives and increased load on the leader. -- `failover_heartbeat_ttl` `(string: "5m")` - Specifies the TTL applied to - heartbeats after a new leader is elected, since we no longer know the status - of all the heartbeats. This is specified using a label suffix like "30s" or - "1h". Lowering the failover TTL is a tradeoff as it lowers failure detection - time of nodes at the tradeoff of false positives. - - `non_voting_server` `(bool: false)` - (Enterprise-only) Specifies whether this server will act as a non-voting member of the cluster to help provide read scalability.