From 0881b9420190120d7f659351ff45c9492f5f3be2 Mon Sep 17 00:00:00 2001 From: Matt Mukerjee Date: Wed, 6 Oct 2021 15:48:12 -0700 Subject: [PATCH] Add FailoverHeartbeatTTL to config (#11127) FailoverHeartbeatTTL is the amount of time to wait after a server leader failure before considering reallocating client tasks. This TTL should be fairly long as the new server leader needs to rebuild the entire heartbeat map for the cluster. In deployments with a small number of machines, the default TTL (5m) may be unnecessary long. Let's allow operators to configure this value in their config files. --- .changelog/11127.txt | 3 +++ command/agent/agent.go | 3 +++ command/agent/agent_test.go | 5 +++++ command/agent/config.go | 12 ++++++++++++ command/agent/config_parse.go | 1 + command/agent/config_parse_test.go | 2 ++ command/agent/testdata/basic.hcl | 1 + command/agent/testdata/basic.json | 1 + website/content/docs/configuration/server.mdx | 10 ++++++++++ 9 files changed, 38 insertions(+) create mode 100644 .changelog/11127.txt diff --git a/.changelog/11127.txt b/.changelog/11127.txt new file mode 100644 index 00000000000..245a4f8ef7e --- /dev/null +++ b/.changelog/11127.txt @@ -0,0 +1,3 @@ +```release-note:improvement +server: Allow tuning of node failover heartbeat TTL +``` diff --git a/command/agent/agent.go b/command/agent/agent.go index a500dea7f43..1943fb68289 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -380,6 +380,9 @@ func convertServerConfig(agentConfig *Config) (*nomad.Config, error) { if maxHPS := agentConfig.Server.MaxHeartbeatsPerSecond; maxHPS != 0 { conf.MaxHeartbeatsPerSecond = maxHPS } + if failoverTTL := agentConfig.Server.FailoverHeartbeatTTL; failoverTTL != 0 { + conf.FailoverHeartbeatTTL = failoverTTL + } if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ServerServiceName == "" { return nil, fmt.Errorf("server_service_name must be set when auto_advertise is enabled") diff --git a/command/agent/agent_test.go b/command/agent/agent_test.go index 39f8380b37e..0182ca26429 100644 --- a/command/agent/agent_test.go +++ b/command/agent/agent_test.go @@ -141,6 +141,11 @@ func TestAgent_ServerConfig(t *testing.T) { require.NoError(t, err) require.Equal(t, float64(11.0), out.MaxHeartbeatsPerSecond) + conf.Server.FailoverHeartbeatTTL = 337 * time.Second + out, err = a.serverConfig() + require.NoError(t, err) + require.Equal(t, 337*time.Second, out.FailoverHeartbeatTTL) + // Defaults to the global bind addr conf.Addresses.RPC = "" conf.Addresses.Serf = "" diff --git a/command/agent/config.go b/command/agent/config.go index 687b3f03748..439fdebfb84 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -441,6 +441,12 @@ type ServerConfig struct { // to meet the target rate. MaxHeartbeatsPerSecond float64 `hcl:"max_heartbeats_per_second"` + // FailoverHeartbeatTTL is the TTL applied to heartbeats after + // a new leader is elected, since we no longer know the status + // of all the heartbeats. + FailoverHeartbeatTTL time.Duration + FailoverHeartbeatTTLHCL string `hcl:"failover_heartbeat_ttl" json:"-"` + // StartJoin is a list of addresses to attempt to join when the // agent starts. If Serf is unable to communicate with any of these // addresses, then the agent will error and exit. @@ -1484,6 +1490,12 @@ func (a *ServerConfig) Merge(b *ServerConfig) *ServerConfig { if b.MaxHeartbeatsPerSecond != 0.0 { result.MaxHeartbeatsPerSecond = b.MaxHeartbeatsPerSecond } + if b.FailoverHeartbeatTTL != 0 { + result.FailoverHeartbeatTTL = b.FailoverHeartbeatTTL + } + if b.FailoverHeartbeatTTLHCL != "" { + result.FailoverHeartbeatTTLHCL = b.FailoverHeartbeatTTLHCL + } if b.RetryMaxAttempts != 0 { result.RetryMaxAttempts = b.RetryMaxAttempts } diff --git a/command/agent/config_parse.go b/command/agent/config_parse.go index 319e7ef195c..b745835711d 100644 --- a/command/agent/config_parse.go +++ b/command/agent/config_parse.go @@ -55,6 +55,7 @@ func ParseConfigFile(path string) (*Config, error) { {"client.server_join.retry_interval", &c.Client.ServerJoin.RetryInterval, &c.Client.ServerJoin.RetryIntervalHCL}, {"server.heartbeat_grace", &c.Server.HeartbeatGrace, &c.Server.HeartbeatGraceHCL}, {"server.min_heartbeat_ttl", &c.Server.MinHeartbeatTTL, &c.Server.MinHeartbeatTTLHCL}, + {"server.failover_heartbeat_ttl", &c.Server.FailoverHeartbeatTTL, &c.Server.FailoverHeartbeatTTLHCL}, {"server.retry_interval", &c.Server.RetryInterval, &c.Server.RetryIntervalHCL}, {"server.server_join.retry_interval", &c.Server.ServerJoin.RetryInterval, &c.Server.ServerJoin.RetryIntervalHCL}, {"consul.timeout", &c.Consul.Timeout, &c.Consul.TimeoutHCL}, diff --git a/command/agent/config_parse_test.go b/command/agent/config_parse_test.go index 9f6d66a6918..5ee1305012f 100644 --- a/command/agent/config_parse_test.go +++ b/command/agent/config_parse_test.go @@ -112,6 +112,8 @@ var basicConfig = &Config{ MinHeartbeatTTL: 33 * time.Second, MinHeartbeatTTLHCL: "33s", MaxHeartbeatsPerSecond: 11.0, + FailoverHeartbeatTTL: 330 * time.Second, + FailoverHeartbeatTTLHCL: "330s", RetryJoin: []string{"1.1.1.1", "2.2.2.2"}, StartJoin: []string{"1.1.1.1", "2.2.2.2"}, RetryInterval: 15 * time.Second, diff --git a/command/agent/testdata/basic.hcl b/command/agent/testdata/basic.hcl index b4a55197d57..c28cdfd938f 100644 --- a/command/agent/testdata/basic.hcl +++ b/command/agent/testdata/basic.hcl @@ -120,6 +120,7 @@ server { heartbeat_grace = "30s" min_heartbeat_ttl = "33s" max_heartbeats_per_second = 11.0 + failover_heartbeat_ttl = "330s" retry_join = ["1.1.1.1", "2.2.2.2"] start_join = ["1.1.1.1", "2.2.2.2"] retry_max = 3 diff --git a/command/agent/testdata/basic.json b/command/agent/testdata/basic.json index 02de2490c4c..a92d7748d50 100644 --- a/command/agent/testdata/basic.json +++ b/command/agent/testdata/basic.json @@ -273,6 +273,7 @@ "job_gc_threshold": "12h", "max_heartbeats_per_second": 11, "min_heartbeat_ttl": "33s", + "failover_heartbeat_ttl": "330s", "node_gc_threshold": "12h", "non_voting_server": true, "num_schedulers": 2, diff --git a/website/content/docs/configuration/server.mdx b/website/content/docs/configuration/server.mdx index 2b0b702587a..8e6eeaa43c5 100644 --- a/website/content/docs/configuration/server.mdx +++ b/website/content/docs/configuration/server.mdx @@ -131,6 +131,16 @@ server { a tradeoff as it lowers failure detection time of nodes at the tradeoff of false positives and increased load on the leader. +- `failover_heartbeat_ttl` `(string: "5m")` - Specifies the TTL applied to + heartbeats after a new leader is elected, since we no longer know the status + of all the heartbeats. This is specified using a label suffix like "30s" or + "1h". + + ~> Lowering the `failover_heartbeat_ttl` is a tradeoff as it lowers failure + detection time of nodes at the tradeoff of false positives. False positives + could cause all clients to stop their allocations if a leadership transition + lasts longer than `heartbeat_grace + failover_heartbeat_ttl`. + - `max_heartbeats_per_second` `(float: 50.0)` - Specifies the maximum target rate of heartbeats being processed per second. This allows the TTL to be increased to meet the target rate. Increasing the maximum heartbeats per