Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use cpu_total_compute configuration for CPU usage too #17628

Merged
merged 1 commit into from
Jul 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/17628.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
cpustats: Use config "cpu_total_compute" (if set) for all CPU statistics
```
15 changes: 7 additions & 8 deletions client/fingerprint/cpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ import (
"github.com/hashicorp/nomad/lib/cpuset"

"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/helper/stats"
"github.com/hashicorp/nomad/client/stats"
shelpers "github.com/hashicorp/nomad/helper/stats"
"github.com/hashicorp/nomad/nomad/structs"
)

Expand Down Expand Up @@ -42,7 +43,7 @@ func NewCPUFingerprint(logger hclog.Logger) Fingerprint {
}

func (f *CPUFingerprint) Fingerprint(request *FingerprintRequest, response *FingerprintResponse) error {
TrueBrain marked this conversation as resolved.
Show resolved Hide resolved
f.initialize()
f.initialize(request)

f.setModelName(response)

Expand All @@ -61,8 +62,8 @@ func (f *CPUFingerprint) Fingerprint(request *FingerprintRequest, response *Fing
return nil
}

func (f *CPUFingerprint) initialize() {
if err := stats.Init(); err != nil {
func (f *CPUFingerprint) initialize(request *FingerprintRequest) {
if err := stats.Init(uint64(request.Config.CpuCompute)); err != nil {
f.logger.Warn("failed initializing stats collector", "error", err)
}
}
Expand Down Expand Up @@ -134,10 +135,8 @@ func (f *CPUFingerprint) setReservableCores(request *FingerprintRequest, respons
func (f *CPUFingerprint) setTotalCompute(request *FingerprintRequest, response *FingerprintResponse) {
var ticks uint64
switch {
case request.Config.CpuCompute > 0:
ticks = uint64(request.Config.CpuCompute)
case stats.TotalTicksAvailable() > 0:
ticks = stats.TotalTicksAvailable()
case shelpers.CpuTotalTicks() > 0:
ticks = shelpers.CpuTotalTicks()
default:
ticks = defaultCPUTicks
}
Expand Down
2 changes: 2 additions & 0 deletions client/fingerprint/env_aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
log "github.com/hashicorp/go-hclog"

cleanhttp "github.com/hashicorp/go-cleanhttp"
"github.com/hashicorp/nomad/helper/stats"
"github.com/hashicorp/nomad/nomad/structs"
)

Expand Down Expand Up @@ -185,6 +186,7 @@ func (f *EnvAWSFingerprint) Fingerprint(request *FingerprintRequest, response *F
if ticks := specs.Ticks(); request.Config.CpuCompute <= 0 {
response.AddAttribute("cpu.totalcompute", fmt.Sprintf("%d", ticks))
f.logger.Debug("setting ec2 cpu", "ticks", ticks)
stats.SetCpuTotalTicks(uint64(ticks))
resources = new(structs.Resources)
resources.CPU = ticks
if nodeResources == nil {
Expand Down
2 changes: 1 addition & 1 deletion client/lib/resources/pid.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
package resources

import (
"github.com/hashicorp/nomad/client/stats"
"github.com/hashicorp/nomad/helper/stats"
)

// PIDs holds all of a task's pids and their cpu percentage calculators
Expand Down
139 changes: 95 additions & 44 deletions client/stats/cpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,64 +4,115 @@
package stats

import (
"runtime"
"context"
"errors"
"fmt"
"sync"
"time"

shelpers "github.com/hashicorp/nomad/helper/stats"
"github.com/hashicorp/nomad/helper/stats"
"github.com/shirou/gopsutil/v3/cpu"
"github.com/shoenig/go-m1cpu"
)

// CpuStats calculates cpu usage percentage
type CpuStats struct {
prevCpuTime float64
prevTime time.Time
const (
// cpuInfoTimeout is the timeout used when gathering CPU info. This is used
// to override the default timeout in gopsutil which has a tendency to
// timeout on Windows.
cpuInfoTimeout = 60 * time.Second
)

totalCpus int
}
var (
cpuPowerCoreCount int
cpuPowerCoreMHz uint64
cpuEfficiencyCoreCount int
cpuEfficiencyCoreMHz uint64
cpuModelName string
)

// NewCpuStats returns a cpu stats calculator
func NewCpuStats() *CpuStats {
numCpus := runtime.NumCPU()
cpuStats := &CpuStats{
totalCpus: numCpus,
}
return cpuStats
}
var (
detectedCpuTotalTicks uint64
initErr error
onceLer sync.Once
)

func Init(configCpuTotalCompute uint64) error {
onceLer.Do(func() {
switch {
case m1cpu.IsAppleSilicon():
cpuModelName = m1cpu.ModelName()
cpuPowerCoreCount = m1cpu.PCoreCount()
cpuPowerCoreMHz = m1cpu.PCoreHz() / 1_000_000
cpuEfficiencyCoreCount = m1cpu.ECoreCount()
cpuEfficiencyCoreMHz = m1cpu.ECoreHz() / 1_000_000
bigTicks := uint64(cpuPowerCoreCount) * cpuPowerCoreMHz
littleTicks := uint64(cpuEfficiencyCoreCount) * cpuEfficiencyCoreMHz
detectedCpuTotalTicks = bigTicks + littleTicks
default:
// for now, all other cpu types assume only power cores
// todo: this is already not true for Intel 13th generation

var err error
if cpuPowerCoreCount, err = cpu.Counts(true); err != nil {
initErr = errors.Join(initErr, fmt.Errorf("failed to detect number of CPU cores: %w", err))
}

ctx, cancel := context.WithTimeout(context.Background(), cpuInfoTimeout)
defer cancel()

// Percent calculates the cpu usage percentage based on the current cpu usage
// and the previous cpu usage where usage is given as time in nanoseconds spend
// in the cpu
func (c *CpuStats) Percent(cpuTime float64) float64 {
now := time.Now()

if c.prevCpuTime == 0.0 {
// invoked first time
c.prevCpuTime = cpuTime
c.prevTime = now
return 0.0
var cpuInfoStats []cpu.InfoStat
if cpuInfoStats, err = cpu.InfoWithContext(ctx); err != nil {
initErr = errors.Join(initErr, fmt.Errorf("Unable to obtain CPU information: %w", err))
}

for _, infoStat := range cpuInfoStats {
cpuModelName = infoStat.ModelName
if uint64(infoStat.Mhz) > cpuPowerCoreMHz {
cpuPowerCoreMHz = uint64(infoStat.Mhz)
}
}

// compute ticks using only power core, until we add support for
// detecting little cores on non-apple platforms
detectedCpuTotalTicks = uint64(cpuPowerCoreCount) * cpuPowerCoreMHz

initErr = err
}

stats.SetCpuTotalTicks(detectedCpuTotalTicks)
})

// override the computed value with the config value if it is set
if configCpuTotalCompute > 0 {
stats.SetCpuTotalTicks(configCpuTotalCompute)
}

timeDelta := now.Sub(c.prevTime).Nanoseconds()
ret := c.calculatePercent(c.prevCpuTime, cpuTime, timeDelta)
c.prevCpuTime = cpuTime
c.prevTime = now
return ret
return initErr
}

// TicksConsumed calculates the total ticks consumes by the process across all
// cpu cores
func (c *CpuStats) TicksConsumed(percent float64) float64 {
return (percent / 100) * float64(shelpers.TotalTicksAvailable()) / float64(c.totalCpus)
// CPUNumCores returns the number of CPU cores available.
//
// This is represented with two values - (Power (P), Efficiency (E)) so we can
// correctly compute total compute for processors with asymetric cores such as
// Apple Silicon.
//
// For platforms with symetric cores (or where we do not correcly detect asymetric
// cores), all cores are presented as P cores.
func CPUNumCores() (int, int) {
return cpuPowerCoreCount, cpuEfficiencyCoreCount
}

func (c *CpuStats) calculatePercent(t1, t2 float64, timeDelta int64) float64 {
vDelta := t2 - t1
if timeDelta <= 0 || vDelta <= 0.0 {
return 0.0
}
// CPUMHzPerCore returns the MHz per CPU (P, E) core type.
//
// As with CPUNumCores, asymetric core detection currently only works with
// Apple Silicon CPUs.
func CPUMHzPerCore() (uint64, uint64) {
return cpuPowerCoreMHz, cpuEfficiencyCoreMHz
}

overall_percent := (vDelta / float64(timeDelta)) * 100.0
return overall_percent
// CPUModelName returns the model name of the CPU.
func CPUModelName() string {
return cpuModelName
}

func (h *HostStatsCollector) collectCPUStats() (cpus []*CPUStats, totalTicks float64, err error) {
Expand All @@ -79,7 +130,7 @@ func (h *HostStatsCollector) collectCPUStats() (cpus []*CPUStats, totalTicks flo
h.statsCalculator[cpuStat.CPU] = percentCalculator
}
idle, user, system, total := percentCalculator.Calculate(cpuStat)
ticks := (total / 100.0) * (float64(shelpers.TotalTicksAvailable()) / float64(len(cpuStats)))
ticks := (total / 100.0) * (float64(stats.CpuTotalTicks()) / float64(len(cpuStats)))
cs[idx] = &CPUStats{
CPU: cpuStat.CPU,
User: user,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ func TestCPU_CPUModelName(t *testing.T) {
must.NotEq(t, "", name)
}

func TestCPU_CPUTotalTicksAvailable(t *testing.T) {
ticks := TotalTicksAvailable()
func TestCPU_CPUCpuTotalTicks(t *testing.T) {
ticks := CpuTotalTicks()
must.Positive(t, ticks)
}
17 changes: 1 addition & 16 deletions client/stats/cpu_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,32 +7,17 @@ import (
"math"
"os"
"testing"
"time"

"github.com/hashicorp/nomad/ci"
shelpers "github.com/hashicorp/nomad/helper/stats"
"github.com/hashicorp/nomad/helper/testlog"
"github.com/stretchr/testify/assert"
)

func TestCpuStatsPercent(t *testing.T) {
ci.Parallel(t)

cs := NewCpuStats()
cs.Percent(79.7)
time.Sleep(1 * time.Second)
percent := cs.Percent(80.69)
expectedPercent := 98.00
if percent < expectedPercent && percent > (expectedPercent+1.00) {
t.Fatalf("expected: %v, actual: %v", expectedPercent, percent)
}
}

func TestHostStats_CPU(t *testing.T) {
ci.Parallel(t)

assert := assert.New(t)
assert.Nil(shelpers.Init())
assert.Nil(Init(0))

logger := testlog.HCLogger(t)
cwd, err := os.Getwd()
Expand Down
2 changes: 1 addition & 1 deletion drivers/docker/util/stats_posix.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ func DockerStatsToTaskResourceUsage(s *docker.Stats) *cstructs.TaskResourceUsage
cs.UserMode = CalculateCPUPercent(
s.CPUStats.CPUUsage.UsageInUsermode, s.PreCPUStats.CPUUsage.UsageInUsermode,
s.CPUStats.CPUUsage.TotalUsage, s.PreCPUStats.CPUUsage.TotalUsage, runtime.NumCPU())
cs.TotalTicks = (cs.Percent / 100) * float64(stats.TotalTicksAvailable()) / float64(runtime.NumCPU())
cs.TotalTicks = (cs.Percent / 100) * float64(stats.CpuTotalTicks()) / float64(runtime.NumCPU())

return &cstructs.TaskResourceUsage{
ResourceUsage: &cstructs.ResourceUsage{
Expand Down
2 changes: 1 addition & 1 deletion drivers/docker/util/stats_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ func DockerStatsToTaskResourceUsage(s *docker.Stats) *cstructs.TaskResourceUsage
ThrottledPeriods: s.CPUStats.ThrottlingData.ThrottledPeriods,
ThrottledTime: s.CPUStats.ThrottlingData.ThrottledTime,
Percent: cpuPercent,
TotalTicks: (cpuPercent / 100) * float64(stats.TotalTicksAvailable()) / float64(runtime.NumCPU()),
TotalTicks: (cpuPercent / 100) * float64(stats.CpuTotalTicks()) / float64(runtime.NumCPU()),
Measured: DockerMeasuredCPUStats,
}

Expand Down
9 changes: 3 additions & 6 deletions drivers/shared/executor/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@ import (
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/lib/fifo"
"github.com/hashicorp/nomad/client/lib/resources"
"github.com/hashicorp/nomad/client/stats"
cstructs "github.com/hashicorp/nomad/client/structs"
shelpers "github.com/hashicorp/nomad/helper/stats"
"github.com/hashicorp/nomad/helper/stats"
"github.com/hashicorp/nomad/plugins/drivers"
"github.com/syndtr/gocapability/capability"
)
Expand Down Expand Up @@ -259,11 +258,9 @@ type UniversalExecutor struct {
}

// NewExecutor returns an Executor
func NewExecutor(logger hclog.Logger) Executor {
func NewExecutor(logger hclog.Logger, cpuTotalTicks uint64) Executor {
logger = logger.Named("executor")
if err := shelpers.Init(); err != nil {
logger.Error("unable to initialize stats", "error", err)
}
stats.SetCpuTotalTicks(cpuTotalTicks)

return &UniversalExecutor{
logger: logger,
Expand Down
4 changes: 2 additions & 2 deletions drivers/shared/executor/executor_basic.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ import (
"github.com/hashicorp/nomad/plugins/drivers"
)

func NewExecutorWithIsolation(logger hclog.Logger) Executor {
func NewExecutorWithIsolation(logger hclog.Logger, cpuTotalTicks uint64) Executor {
logger = logger.Named("executor")
logger.Error("isolation executor is not supported on this platform, using default")
return NewExecutor(logger)
return NewExecutor(logger, cpuTotalTicks)
}

func (e *UniversalExecutor) configureResourceContainer(_ int) error { return nil }
Expand Down
10 changes: 4 additions & 6 deletions drivers/shared/executor/executor_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,9 @@ import (
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/lib/cgutil"
"github.com/hashicorp/nomad/client/lib/resources"
"github.com/hashicorp/nomad/client/stats"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/drivers/shared/capabilities"
shelpers "github.com/hashicorp/nomad/helper/stats"
"github.com/hashicorp/nomad/helper/stats"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/plugins/drivers"
Expand Down Expand Up @@ -71,11 +70,10 @@ type LibcontainerExecutor struct {
exitState *ProcessState
}

func NewExecutorWithIsolation(logger hclog.Logger) Executor {
func NewExecutorWithIsolation(logger hclog.Logger, cpuTotalTicks uint64) Executor {
logger = logger.Named("isolated_executor")
if err := shelpers.Init(); err != nil {
logger.Error("unable to initialize stats", "error", err)
}
stats.SetCpuTotalTicks(cpuTotalTicks)

return &LibcontainerExecutor{
id: strings.ReplaceAll(uuid.Generate(), "-", "_"),
logger: logger,
Expand Down
Loading