From 36dc330737b89296b1df750af06f30a30554fe07 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 14 Mar 2017 12:56:31 -0700 Subject: [PATCH 1/2] Various fixes This PR: * Uses Go 1.8 executable lookup * Stores any err message from stats init method * Allows overriding of Cpu Compute for hosts where it can't be detected --- client/config/config.go | 27 +++++++++++++++++ client/driver/executor/executor.go | 3 +- client/fingerprint/cpu.go | 31 +++++++++++++++----- command/agent/agent.go | 3 ++ command/agent/config-test-fixtures/basic.hcl | 1 + command/agent/config.go | 6 ++++ command/agent/config_parse.go | 1 + command/agent/config_parse_test.go | 1 + command/agent/config_test.go | 2 ++ helper/discover/discover.go | 6 ++-- helper/stats/cpu.go | 12 ++++---- 11 files changed, 74 insertions(+), 19 deletions(-) diff --git a/client/config/config.go b/client/config/config.go index 6924fd7229e..24e115f35db 100644 --- a/client/config/config.go +++ b/client/config/config.go @@ -87,6 +87,10 @@ type Config struct { // be determined dynamically. NetworkSpeed int + // CpuCompute is the default total CPU compute if they can not be determined + // dynamically. It should be given as Cores * MHz (2 Cores * 2 Ghz = 4000) + CpuCompute int + // MaxKillTimeout allows capping the user-specifiable KillTimeout. If the // task's KillTimeout is greater than the MaxKillTimeout, MaxKillTimeout is // used. @@ -242,6 +246,29 @@ func (c *Config) ReadBoolDefault(id string, defaultValue bool) bool { return val } +// ReadInt parses the specified option as a int. +func (c *Config) ReadInt(id string) (int, error) { + val, ok := c.Options[id] + if !ok { + return 0, fmt.Errorf("Specified config is missing from options") + } + ival, err := strconv.Atoi(val) + if err != nil { + return 0, fmt.Errorf("Failed to parse %s as int: %s", val, err) + } + return ival, nil +} + +// ReadIntDefault tries to parse the specified option as a int. If there is +// an error in parsing, the default option is returned. +func (c *Config) ReadIntDefault(id string, defaultValue int) int { + val, err := c.ReadInt(id) + if err != nil { + return defaultValue + } + return val +} + // ReadDuration parses the specified option as a duration. func (c *Config) ReadDuration(id string) (time.Duration, error) { val, ok := c.Options[id] diff --git a/client/driver/executor/executor.go b/client/driver/executor/executor.go index f5306ed7acd..53b9c37f581 100644 --- a/client/driver/executor/executor.go +++ b/client/driver/executor/executor.go @@ -207,8 +207,7 @@ type UniversalExecutor struct { // NewExecutor returns an Executor func NewExecutor(logger *log.Logger) Executor { if err := shelpers.Init(); err != nil { - logger.Printf("[FATAL] executor: unable to initialize stats: %v", err) - return nil + logger.Printf("[ERR] executor: unable to initialize stats: %v", err) } exec := &UniversalExecutor{ diff --git a/client/fingerprint/cpu.go b/client/fingerprint/cpu.go index 00f9b848c28..0aef536a1de 100644 --- a/client/fingerprint/cpu.go +++ b/client/fingerprint/cpu.go @@ -22,8 +22,29 @@ func NewCPUFingerprint(logger *log.Logger) Fingerprint { } func (f *CPUFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) { + setResources := func(totalCompute int) { + if node.Resources == nil { + node.Resources = &structs.Resources{} + } + + node.Resources.CPU = totalCompute + } + if err := stats.Init(); err != nil { - return false, fmt.Errorf("Unable to obtain CPU information: %v", err) + err := fmt.Errorf("Unable to obtain CPU information: %v", err) + + if cfg.CpuCompute != 0 { + f.logger.Printf("[DEBUG] fingerprint.cpu: %v. Using specified cpu compute %d", err, cfg.CpuCompute) + setResources(cfg.CpuCompute) + return true, nil + } + + f.logger.Printf("[ERR] fingerprint.cpu: %v", err) + f.logger.Printf("[INFO] fingerprint.cpu: cpu compute may be set manually"+ + " using the client config option %q on machines where cpu information"+ + " can not be automatically detected.", "cpu_compute") + + return false, err } modelName := stats.CPUModelName() @@ -40,13 +61,9 @@ func (f *CPUFingerprint) Fingerprint(cfg *config.Config, node *structs.Node) (bo f.logger.Printf("[DEBUG] fingerprint.cpu: core count: %d", numCores) tt := stats.TotalTicksAvailable() - node.Attributes["cpu.totalcompute"] = fmt.Sprintf("%.0f", tt) - if node.Resources == nil { - node.Resources = &structs.Resources{} - } - - node.Resources.CPU = int(tt) + node.Attributes["cpu.totalcompute"] = fmt.Sprintf("%.0f", tt) + setResources(int(tt)) return true, nil } diff --git a/command/agent/agent.go b/command/agent/agent.go index 4c9ca7d0073..142dfe2e34e 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -259,6 +259,9 @@ func (a *Agent) clientConfig() (*clientconfig.Config, error) { if a.config.Client.NetworkSpeed != 0 { conf.NetworkSpeed = a.config.Client.NetworkSpeed } + if a.config.Client.CpuCompute != 0 { + conf.CpuCompute = a.config.Client.CpuCompute + } if a.config.Client.MaxKillTimeout != "" { dur, err := time.ParseDuration(a.config.Client.MaxKillTimeout) if err != nil { diff --git a/command/agent/config-test-fixtures/basic.hcl b/command/agent/config-test-fixtures/basic.hcl index 28d71e64a8c..bd9bcbe6a4f 100644 --- a/command/agent/config-test-fixtures/basic.hcl +++ b/command/agent/config-test-fixtures/basic.hcl @@ -39,6 +39,7 @@ client { } network_interface = "eth0" network_speed = 100 + cpu_compute = 4444 reserved { cpu = 10 memory = 10 diff --git a/command/agent/config.go b/command/agent/config.go index ce14d03b63a..0528d8796f0 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -182,6 +182,9 @@ type ClientConfig struct { // speed. NetworkSpeed int `mapstructure:"network_speed"` + // CpuCompute is used to override any detected or default total CPU compute. + CpuCompute int `mapstructure:"cpu_compute"` + // MaxKillTimeout allows capping the user-specifiable KillTimeout. MaxKillTimeout string `mapstructure:"max_kill_timeout"` @@ -916,6 +919,9 @@ func (a *ClientConfig) Merge(b *ClientConfig) *ClientConfig { if b.NetworkSpeed != 0 { result.NetworkSpeed = b.NetworkSpeed } + if b.CpuCompute != 0 { + result.CpuCompute = b.CpuCompute + } if b.MaxKillTimeout != "" { result.MaxKillTimeout = b.MaxKillTimeout } diff --git a/command/agent/config_parse.go b/command/agent/config_parse.go index f2bda68f149..10516a845ce 100644 --- a/command/agent/config_parse.go +++ b/command/agent/config_parse.go @@ -336,6 +336,7 @@ func parseClient(result **ClientConfig, list *ast.ObjectList) error { "chroot_env", "network_interface", "network_speed", + "cpu_compute", "max_kill_timeout", "client_max_port", "client_min_port", diff --git a/command/agent/config_parse_test.go b/command/agent/config_parse_test.go index db5dca3319e..19fccdf64a6 100644 --- a/command/agent/config_parse_test.go +++ b/command/agent/config_parse_test.go @@ -59,6 +59,7 @@ func TestConfig_Parse(t *testing.T) { }, NetworkInterface: "eth0", NetworkSpeed: 100, + CpuCompute: 4444, MaxKillTimeout: "10s", ClientMinPort: 1000, ClientMaxPort: 2000, diff --git a/command/agent/config_test.go b/command/agent/config_test.go index ae5f5653398..d28ef336f1b 100644 --- a/command/agent/config_test.go +++ b/command/agent/config_test.go @@ -76,6 +76,7 @@ func TestConfig_Merge(t *testing.T) { "foo": "bar", }, NetworkSpeed: 100, + CpuCompute: 100, MaxKillTimeout: "20s", ClientMaxPort: 19996, Reserved: &Resources{ @@ -202,6 +203,7 @@ func TestConfig_Merge(t *testing.T) { ClientMaxPort: 20000, ClientMinPort: 22000, NetworkSpeed: 105, + CpuCompute: 105, MaxKillTimeout: "50s", Reserved: &Resources{ CPU: 15, diff --git a/helper/discover/discover.go b/helper/discover/discover.go index 8582a01330d..ec79c3c4c63 100644 --- a/helper/discover/discover.go +++ b/helper/discover/discover.go @@ -6,8 +6,6 @@ import ( "os/exec" "path/filepath" "runtime" - - "github.com/kardianos/osext" ) // Checks the current executable, then $GOPATH/bin, and finally the CWD, in that @@ -19,12 +17,12 @@ func NomadExecutable() (string, error) { } // Check the current executable. - bin, err := osext.Executable() + bin, err := os.Executable() if err != nil { return "", fmt.Errorf("Failed to determine the nomad executable: %v", err) } - if filepath.Base(bin) == nomadExe { + if _, err := os.Stat(bin); err == nil { return bin, nil } diff --git a/helper/stats/cpu.go b/helper/stats/cpu.go index 9c0cd72d839..7326deda6e4 100644 --- a/helper/stats/cpu.go +++ b/helper/stats/cpu.go @@ -14,20 +14,20 @@ var ( cpuNumCores int cpuTotalTicks float64 + initErr error onceLer sync.Once ) func Init() error { - var err error onceLer.Do(func() { - if cpuNumCores, err = cpu.Counts(true); err != nil { - err = fmt.Errorf("Unable to determine the number of CPU cores available: %v", err) + if cpuNumCores, initErr = cpu.Counts(true); initErr != nil { + initErr = fmt.Errorf("Unable to determine the number of CPU cores available: %v", initErr) return } var cpuInfo []cpu.InfoStat - if cpuInfo, err = cpu.Info(); err != nil { - err = fmt.Errorf("Unable to obtain CPU information: %v", err) + if cpuInfo, initErr = cpu.Info(); initErr != nil { + initErr = fmt.Errorf("Unable to obtain CPU information: %v", initErr) return } @@ -42,7 +42,7 @@ func Init() error { cpuMhzPerCore = math.Floor(cpuMhzPerCore) cpuTotalTicks = math.Floor(float64(cpuNumCores) * cpuMhzPerCore) }) - return err + return initErr } // CPUModelName returns the number of CPU cores available From a6067d5667b3905bb6da295c091a216728b92253 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 14 Mar 2017 13:41:57 -0700 Subject: [PATCH 2/2] Docker doesn't need to init the stats helper --- client/driver/docker.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/client/driver/docker.go b/client/driver/docker.go index a65d885f366..870c690695e 100644 --- a/client/driver/docker.go +++ b/client/driver/docker.go @@ -612,11 +612,6 @@ func (d *DockerDriver) dockerClients() (*docker.Client, *docker.Client, error) { var err error var merr multierror.Error createClients.Do(func() { - if err = shelpers.Init(); err != nil { - d.logger.Printf("[FATAL] driver.docker: unable to initialize stats: %v", err) - return - } - // Default to using whatever is configured in docker.endpoint. If this is // not specified we'll fall back on NewClientFromEnv which reads config from // the DOCKER_* environment variables DOCKER_HOST, DOCKER_TLS_VERIFY, and