From 744c9a485dcf9abffdda182d426072a346008e6c Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Sun, 12 Jan 2020 15:50:18 -0500 Subject: [PATCH 1/2] scheduler: allow configuring default preemption for system scheduler Some operators want a greater control over when preemption is enabled, especially during an upgrade to limit potential side-effects. --- command/agent/agent.go | 7 +++++++ command/agent/agent_test.go | 29 +++++++++++++++++++++++++++++ command/agent/config.go | 3 +++ nomad/config.go | 8 ++++++-- nomad/leader.go | 16 +++++++++------- 5 files changed, 54 insertions(+), 9 deletions(-) diff --git a/command/agent/agent.go b/command/agent/agent.go index f8d85f5623a..4d2f7fb903e 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -322,6 +322,13 @@ func convertServerConfig(agentConfig *Config) (*nomad.Config, error) { return nil, fmt.Errorf("server_service_name must be set when auto_advertise is enabled") } + // handle system scheduler preemption default + if agentConfig.Server.SystemSchedulerPreemptionEnabledDefault != nil { + conf.SystemSchedulerPreemptionEnabledDefault = *agentConfig.Server.SystemSchedulerPreemptionEnabledDefault + } else { + conf.SystemSchedulerPreemptionEnabledDefault = true + } + // Add the Consul and Vault configs conf.ConsulConfig = agentConfig.Consul conf.VaultConfig = agentConfig.Vault diff --git a/command/agent/agent_test.go b/command/agent/agent_test.go index 0cdffa24a0f..ffabf8bd008 100644 --- a/command/agent/agent_test.go +++ b/command/agent/agent_test.go @@ -1,6 +1,7 @@ package agent import ( + "fmt" "io/ioutil" "os" "path/filepath" @@ -274,6 +275,34 @@ func TestAgent_ServerConfig(t *testing.T) { } } +func TestAgent_ServerConfig_SchedulerFlags(t *testing.T) { + cases := []struct { + input *bool + expected bool + }{ + {nil, true}, + {helper.BoolToPtr(false), false}, + {helper.BoolToPtr(true), true}, + } + + for _, c := range cases { + t.Run(fmt.Sprintf("case: %v", c.input), func(t *testing.T) { + conf := DefaultConfig() + conf.Server.SystemSchedulerPreemptionEnabledDefault = c.input + + a := &Agent{config: conf} + conf.AdvertiseAddrs.Serf = "127.0.0.1:4000" + conf.AdvertiseAddrs.RPC = "127.0.0.1:4001" + conf.AdvertiseAddrs.HTTP = "10.10.11.1:4005" + conf.ACL.Enabled = true + require.NoError(t, conf.normalizeAddrs()) + + out, err := a.serverConfig() + require.NoError(t, err) + require.Equal(t, c.expected, out.SystemSchedulerPreemptionEnabledDefault) + }) + } +} func TestAgent_ClientConfig(t *testing.T) { t.Parallel() conf := DefaultConfig() diff --git a/command/agent/config.go b/command/agent/config.go index 39259abb2c4..c3b8056197a 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -444,6 +444,9 @@ type ServerConfig struct { // ServerJoin contains information that is used to attempt to join servers ServerJoin *ServerJoin `hcl:"server_join"` + // SystemSchedulerPreemptionEnabledDefault is used to determin whether to enable system preemption by default in a new cluster + SystemSchedulerPreemptionEnabledDefault *bool `hcl:"system_scheduler_preemption_enabled_default"` + // ExtraKeysHCL is used by hcl to surface unexpected keys ExtraKeysHCL []string `hcl:",unusedKeys" json:"-"` } diff --git a/nomad/config.go b/nomad/config.go index 4b441f135a3..67e02a19c33 100644 --- a/nomad/config.go +++ b/nomad/config.go @@ -305,6 +305,9 @@ type Config struct { // dead servers. AutopilotInterval time.Duration + // SystemSchedulerPreemptionEnabledDefault is used to determin whether to enable system preemption by default in a new cluster + SystemSchedulerPreemptionEnabledDefault bool + // PluginLoader is used to load plugins. PluginLoader loader.PluginCatalog @@ -377,8 +380,9 @@ func DefaultConfig() *Config { MaxTrailingLogs: 250, ServerStabilizationTime: 10 * time.Second, }, - ServerHealthInterval: 2 * time.Second, - AutopilotInterval: 10 * time.Second, + ServerHealthInterval: 2 * time.Second, + AutopilotInterval: 10 * time.Second, + SystemSchedulerPreemptionEnabledDefault: true, } // Enable all known schedulers by default diff --git a/nomad/leader.go b/nomad/leader.go index ec406a4b4b1..268b3a6ee95 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -45,12 +45,14 @@ var minAutopilotVersion = version.Must(version.NewVersion("0.8.0")) var minSchedulerConfigVersion = version.Must(version.NewVersion("0.9.0")) // Default configuration for scheduler with preemption enabled for system jobs -var defaultSchedulerConfig = &structs.SchedulerConfiguration{ - PreemptionConfig: structs.PreemptionConfig{ - SystemSchedulerEnabled: true, - BatchSchedulerEnabled: false, - ServiceSchedulerEnabled: false, - }, +func (s *Server) defaultSchedulerConfig() structs.SchedulerConfiguration { + return structs.SchedulerConfiguration{ + PreemptionConfig: structs.PreemptionConfig{ + SystemSchedulerEnabled: s.config.SystemSchedulerPreemptionEnabledDefault, + BatchSchedulerEnabled: false, + ServiceSchedulerEnabled: false, + }, + } } // monitorLeadership is used to monitor if we acquire or lose our role @@ -1319,7 +1321,7 @@ func (s *Server) getOrCreateSchedulerConfig() *structs.SchedulerConfiguration { return nil } - req := structs.SchedulerSetConfigRequest{Config: *defaultSchedulerConfig} + req := structs.SchedulerSetConfigRequest{Config: s.defaultSchedulerConfig()} if _, _, err = s.raftApply(structs.SchedulerConfigRequestType, req); err != nil { s.logger.Named("core").Error("failed to initialize config", "error", err) return nil From 31025d6cacc3e620751d9037dee17e464cc373b1 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Tue, 28 Jan 2020 11:09:36 -0500 Subject: [PATCH 2/2] Support customizing full scheduler config --- command/agent/agent.go | 6 ++--- command/agent/agent_test.go | 50 ++++++++++++++++++++++++++++++------- command/agent/config.go | 11 ++++++-- nomad/config.go | 18 +++++++++---- nomad/leader.go | 13 +--------- nomad/structs/operator.go | 8 +++--- 6 files changed, 70 insertions(+), 36 deletions(-) diff --git a/command/agent/agent.go b/command/agent/agent.go index 4d2f7fb903e..205d100c5b7 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -323,10 +323,8 @@ func convertServerConfig(agentConfig *Config) (*nomad.Config, error) { } // handle system scheduler preemption default - if agentConfig.Server.SystemSchedulerPreemptionEnabledDefault != nil { - conf.SystemSchedulerPreemptionEnabledDefault = *agentConfig.Server.SystemSchedulerPreemptionEnabledDefault - } else { - conf.SystemSchedulerPreemptionEnabledDefault = true + if agentConfig.Server.DefaultSchedulerConfig != nil { + conf.DefaultSchedulerConfig = *agentConfig.Server.DefaultSchedulerConfig } // Add the Consul and Vault configs diff --git a/command/agent/agent_test.go b/command/agent/agent_test.go index ffabf8bd008..8472782ccd5 100644 --- a/command/agent/agent_test.go +++ b/command/agent/agent_test.go @@ -1,7 +1,6 @@ package agent import ( - "fmt" "io/ioutil" "os" "path/filepath" @@ -277,18 +276,51 @@ func TestAgent_ServerConfig(t *testing.T) { func TestAgent_ServerConfig_SchedulerFlags(t *testing.T) { cases := []struct { - input *bool - expected bool + name string + input *structs.SchedulerConfiguration + expected structs.SchedulerConfiguration }{ - {nil, true}, - {helper.BoolToPtr(false), false}, - {helper.BoolToPtr(true), true}, + { + "default case", + nil, + structs.SchedulerConfiguration{ + PreemptionConfig: structs.PreemptionConfig{ + SystemSchedulerEnabled: true, + }, + }, + }, + { + "empty value: preemption is disabled", + &structs.SchedulerConfiguration{}, + structs.SchedulerConfiguration{ + PreemptionConfig: structs.PreemptionConfig{ + SystemSchedulerEnabled: false, + }, + }, + }, + { + "all explicitly set", + &structs.SchedulerConfiguration{ + PreemptionConfig: structs.PreemptionConfig{ + SystemSchedulerEnabled: true, + BatchSchedulerEnabled: true, + ServiceSchedulerEnabled: true, + }, + }, + structs.SchedulerConfiguration{ + PreemptionConfig: structs.PreemptionConfig{ + SystemSchedulerEnabled: true, + BatchSchedulerEnabled: true, + ServiceSchedulerEnabled: true, + }, + }, + }, } for _, c := range cases { - t.Run(fmt.Sprintf("case: %v", c.input), func(t *testing.T) { + t.Run(c.name, func(t *testing.T) { conf := DefaultConfig() - conf.Server.SystemSchedulerPreemptionEnabledDefault = c.input + conf.Server.DefaultSchedulerConfig = c.input a := &Agent{config: conf} conf.AdvertiseAddrs.Serf = "127.0.0.1:4000" @@ -299,7 +331,7 @@ func TestAgent_ServerConfig_SchedulerFlags(t *testing.T) { out, err := a.serverConfig() require.NoError(t, err) - require.Equal(t, c.expected, out.SystemSchedulerPreemptionEnabledDefault) + require.Equal(t, c.expected, out.DefaultSchedulerConfig) }) } } diff --git a/command/agent/config.go b/command/agent/config.go index c3b8056197a..07e59703b0f 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -444,8 +444,10 @@ type ServerConfig struct { // ServerJoin contains information that is used to attempt to join servers ServerJoin *ServerJoin `hcl:"server_join"` - // SystemSchedulerPreemptionEnabledDefault is used to determin whether to enable system preemption by default in a new cluster - SystemSchedulerPreemptionEnabledDefault *bool `hcl:"system_scheduler_preemption_enabled_default"` + // DefaultSchedulerConfig configures the initial scheduler config to be persisted in Raft. + // Once the cluster is bootstrapped, and Raft persists the config (from here or through API), + // This value is ignored. + DefaultSchedulerConfig *structs.SchedulerConfiguration `hcl:"default_scheduler_config"` // ExtraKeysHCL is used by hcl to surface unexpected keys ExtraKeysHCL []string `hcl:",unusedKeys" json:"-"` @@ -1340,6 +1342,11 @@ func (a *ServerConfig) Merge(b *ServerConfig) *ServerConfig { result.ServerJoin = result.ServerJoin.Merge(b.ServerJoin) } + if b.DefaultSchedulerConfig != nil { + c := *b.DefaultSchedulerConfig + result.DefaultSchedulerConfig = &c + } + // Add the schedulers result.EnabledSchedulers = append(result.EnabledSchedulers, b.EnabledSchedulers...) diff --git a/nomad/config.go b/nomad/config.go index 67e02a19c33..1460b17fab7 100644 --- a/nomad/config.go +++ b/nomad/config.go @@ -305,8 +305,10 @@ type Config struct { // dead servers. AutopilotInterval time.Duration - // SystemSchedulerPreemptionEnabledDefault is used to determin whether to enable system preemption by default in a new cluster - SystemSchedulerPreemptionEnabledDefault bool + // DefaultSchedulerConfig configures the initial scheduler config to be persisted in Raft. + // Once the cluster is bootstrapped, and Raft persists the config (from here or through API), + // This value is ignored. + DefaultSchedulerConfig structs.SchedulerConfiguration `hcl:"default_scheduler_config"` // PluginLoader is used to load plugins. PluginLoader loader.PluginCatalog @@ -380,9 +382,15 @@ func DefaultConfig() *Config { MaxTrailingLogs: 250, ServerStabilizationTime: 10 * time.Second, }, - ServerHealthInterval: 2 * time.Second, - AutopilotInterval: 10 * time.Second, - SystemSchedulerPreemptionEnabledDefault: true, + ServerHealthInterval: 2 * time.Second, + AutopilotInterval: 10 * time.Second, + DefaultSchedulerConfig: structs.SchedulerConfiguration{ + PreemptionConfig: structs.PreemptionConfig{ + SystemSchedulerEnabled: true, + BatchSchedulerEnabled: false, + ServiceSchedulerEnabled: false, + }, + }, } // Enable all known schedulers by default diff --git a/nomad/leader.go b/nomad/leader.go index 268b3a6ee95..f92ea0d63fc 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -44,17 +44,6 @@ var minAutopilotVersion = version.Must(version.NewVersion("0.8.0")) var minSchedulerConfigVersion = version.Must(version.NewVersion("0.9.0")) -// Default configuration for scheduler with preemption enabled for system jobs -func (s *Server) defaultSchedulerConfig() structs.SchedulerConfiguration { - return structs.SchedulerConfiguration{ - PreemptionConfig: structs.PreemptionConfig{ - SystemSchedulerEnabled: s.config.SystemSchedulerPreemptionEnabledDefault, - BatchSchedulerEnabled: false, - ServiceSchedulerEnabled: false, - }, - } -} - // monitorLeadership is used to monitor if we acquire or lose our role // as the leader in the Raft cluster. There is some work the leader is // expected to do, so we must react to changes @@ -1321,7 +1310,7 @@ func (s *Server) getOrCreateSchedulerConfig() *structs.SchedulerConfiguration { return nil } - req := structs.SchedulerSetConfigRequest{Config: s.defaultSchedulerConfig()} + req := structs.SchedulerSetConfigRequest{Config: s.config.DefaultSchedulerConfig} if _, _, err = s.raftApply(structs.SchedulerConfigRequestType, req); err != nil { s.logger.Named("core").Error("failed to initialize config", "error", err) return nil diff --git a/nomad/structs/operator.go b/nomad/structs/operator.go index 9966a5e6392..978d5304f4e 100644 --- a/nomad/structs/operator.go +++ b/nomad/structs/operator.go @@ -124,7 +124,7 @@ type AutopilotConfig struct { type SchedulerConfiguration struct { // PreemptionConfig specifies whether to enable eviction of lower // priority jobs to place higher priority jobs. - PreemptionConfig PreemptionConfig + PreemptionConfig PreemptionConfig `hcl:"preemption_config"` // CreateIndex/ModifyIndex store the create/modify indexes of this configuration. CreateIndex uint64 @@ -152,13 +152,13 @@ type SchedulerSetConfigurationResponse struct { // PreemptionConfig specifies whether preemption is enabled based on scheduler type type PreemptionConfig struct { // SystemSchedulerEnabled specifies if preemption is enabled for system jobs - SystemSchedulerEnabled bool + SystemSchedulerEnabled bool `hcl:"system_scheduler_enabled"` // BatchSchedulerEnabled specifies if preemption is enabled for batch jobs - BatchSchedulerEnabled bool + BatchSchedulerEnabled bool `hcl:"batch_scheduler_enabled"` // ServiceSchedulerEnabled specifies if preemption is enabled for service jobs - ServiceSchedulerEnabled bool + ServiceSchedulerEnabled bool `hcl:"service_Scheduler_enabled"` } // SchedulerSetConfigRequest is used by the Operator endpoint to update the