Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix num cpu #1518

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

### Changes

* [ENHANCEMENT] Add `--collector.perf.cpus` to allow setting the CPU list for perf stats.
* [CHANGE] Add `--collector.netdev.device-whitelist`. #1279
* [CHANGE] Refactor mdadm collector #1403
* [CHANGE] Add `mountaddr` label to NFS metrics. #1417
Expand Down
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,13 @@ Depending on the configured value different metrics will be available, for most
cases `0` will provide the most complete set. For more information see [`man 2
perf_event_open`](http://man7.org/linux/man-pages/man2/perf_event_open.2.html).

By default, the perf collector will only collect metrics of the CPUs that
`node_exporter` can run on. If this is insufficient (e.g. if you run `node_exporter` with
its CPU affinity set to specific CPUs) You can specify a list of alternate CPUs by using the
`--collector.perf.cpus` flag. For example, to collect metrics on CPUs 2-6, you
would specify: `--collector.perf --collector.perf.cpus=2-6`.
jdamato-fsly marked this conversation as resolved.
Show resolved Hide resolved


Name | Description | OS
---------|-------------|----
buddyinfo | Exposes statistics of memory fragments as reported by /proc/buddyinfo. | Linux
Expand Down
80 changes: 66 additions & 14 deletions collector/perf_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,25 @@ package collector

import (
"fmt"
"runtime"

perf "github.com/hodgesds/perf-utils"
"github.com/prometheus/client_golang/prometheus"
kingpin "gopkg.in/alecthomas/kingpin.v2"
"runtime"
"strconv"
"strings"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Normally we group all the built-in imports at the top, can you re-format these using goimports?

go get golang.org/x/tools/cmd/goimports
goimports -w ./collector/perf_linux.go

)

const (
perfSubsystem = "perf"
)

var (
cpus = kingpin.Flag("collector.perf.cpus", "List of CPUs from which perf metrics should be collected").Default("").String()
hwProfilerCpuMap = make(map[*perf.HardwareProfiler]int)
swProfilerCpuMap = make(map[*perf.SoftwareProfiler]int)
cacheProfilerCpuMap = make(map[*perf.CacheProfiler]int)
jdamato-fsly marked this conversation as resolved.
Show resolved Hide resolved
)

func init() {
registerCollector(perfSubsystem, defaultDisabled, NewPerfCollector)
}
Expand All @@ -41,6 +50,14 @@ type perfCollector struct {
desc map[string]*prometheus.Desc
}

func isValidCPUString(cpus *string) bool {
if cpus == nil || *cpus == "" || !strings.Contains(*cpus, "-") || strings.Count(*cpus, "-") != 1 {
return false
}

return true
}

// NewPerfCollector returns a new perf based collector, it creates a profiler
// per CPU.
func NewPerfCollector() (Collector, error) {
Expand All @@ -49,23 +66,55 @@ func NewPerfCollector() (Collector, error) {
perfSwProfilers: map[int]perf.SoftwareProfiler{},
perfCacheProfilers: map[int]perf.CacheProfiler{},
}
ncpus := runtime.NumCPU()
for i := 0; i < ncpus; i++ {

start := 0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use a var block here:

var (
  start = 0
  ncpus = 0
  ..
)

ncpus := 0
var err error

if !isValidCPUString(cpus) {
jdamato-fsly marked this conversation as resolved.
Show resolved Hide resolved
start = 0
ncpus = runtime.NumCPU() - 1
} else {
cpu_range := strings.Split(*cpus, "-")
jdamato-fsly marked this conversation as resolved.
Show resolved Hide resolved
start, err = strconv.Atoi(cpu_range[0])
if err != nil {
start = 0
}

ncpus, err = strconv.Atoi(cpu_range[1])
if err != nil {
ncpus = runtime.NumCPU() - 1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we should fallback to the default when a user provided a non-integer. This should fail instead.

}
}

for i, idx := start, 0; i <= ncpus; i, idx = i+1, idx+1 {
// Use -1 to profile all processes on the CPU, see:
// man perf_event_open
collector.perfHwProfilers[i] = perf.NewHardwareProfiler(-1, i)
if err := collector.perfHwProfilers[i].Start(); err != nil {
p := perf.NewHardwareProfiler(-1, i)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's rename the p's here:

  • ph = hardware profiler
  • ps = software profiler
  • pc = cache profiler

collector.perfHwProfilers[idx] = p
if err := collector.perfHwProfilers[idx].Start(); err != nil {
return collector, err
} else {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need for the else here, if err != nil it returns anyway.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(same below)

hwProfilerCpuMap[&p] = i
}
collector.perfSwProfilers[i] = perf.NewSoftwareProfiler(-1, i)

p2 := perf.NewSoftwareProfiler(-1, i)
collector.perfSwProfilers[i] = p2
if err := collector.perfSwProfilers[i].Start(); err != nil {
return collector, err
} else {
swProfilerCpuMap[&p2] = i
}
collector.perfCacheProfilers[i] = perf.NewCacheProfiler(-1, i)

p3 := perf.NewCacheProfiler(-1, i)
collector.perfCacheProfilers[i] = p3
if err := collector.perfCacheProfilers[i].Start(); err != nil {
return collector, err
} else {
cacheProfilerCpuMap[&p3] = i
}
}

collector.desc = map[string]*prometheus.Desc{
"cpucycles_total": prometheus.NewDesc(
prometheus.BuildFQName(
Expand Down Expand Up @@ -330,8 +379,9 @@ func (c *perfCollector) Update(ch chan<- prometheus.Metric) error {
}

func (c *perfCollector) updateHardwareStats(ch chan<- prometheus.Metric) error {
for cpu, profiler := range c.perfHwProfilers {
cpuStr := fmt.Sprintf("%d", cpu)
for _, profiler := range c.perfHwProfilers {
cpuid := hwProfilerCpuMap[&profiler]
cpuStr := fmt.Sprintf("%d", cpuid)
hwProfile, err := profiler.Profile()
if err != nil {
return err
Expand Down Expand Up @@ -401,8 +451,9 @@ func (c *perfCollector) updateHardwareStats(ch chan<- prometheus.Metric) error {
}

func (c *perfCollector) updateSoftwareStats(ch chan<- prometheus.Metric) error {
for cpu, profiler := range c.perfSwProfilers {
cpuStr := fmt.Sprintf("%d", cpu)
for _, profiler := range c.perfSwProfilers {
cpuid := swProfilerCpuMap[&profiler]
cpuStr := fmt.Sprintf("%d", cpuid)
swProfile, err := profiler.Profile()
if err != nil {
return err
Expand Down Expand Up @@ -456,8 +507,9 @@ func (c *perfCollector) updateSoftwareStats(ch chan<- prometheus.Metric) error {
}

func (c *perfCollector) updateCacheStats(ch chan<- prometheus.Metric) error {
for cpu, profiler := range c.perfCacheProfilers {
cpuStr := fmt.Sprintf("%d", cpu)
for _, profiler := range c.perfCacheProfilers {
cpuid := cacheProfilerCpuMap[&profiler]
cpuStr := fmt.Sprintf("%d", cpuid)
cacheProfile, err := profiler.Profile()
if err != nil {
return err
Expand Down