Skip to content

Commit

Permalink
pmd: add dpdk threads context switches metrics
Browse files Browse the repository at this point in the history
The PMD threads should avoid context switching as much as possible. For
vhost user, they must do so from time to time since the virtio side
expects a kick on a file descriptor which requires to write() on it.

However, non-voluntary context switches should *NEVER* happen. When they
do it means there is an issue with isolation of the CPUs dedicated for
OVS DPDK.

Add two new metrics to expose both the voluntary and non-voluntary
context switches occurring on OVS DPDK PMD threads.

Signed-off-by: Robin Jarry <[email protected]>
  • Loading branch information
rjarry authored and atyronesmith committed Jan 8, 2025
1 parent 8570441 commit c4a0667
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 4 deletions.
115 changes: 111 additions & 4 deletions collectors/pmd_rxq/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ package pmd_rxq

import (
"bufio"
"errors"
"io"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
Expand Down Expand Up @@ -43,9 +47,7 @@ var (
)

func (Collector) Collect(ch chan<- prometheus.Metric) {
if !config.MetricSets().Has(config.METRICS_PERF) {
return
}
stats := getVswitchdPmdStat()

buf := appctl.OvsVSwitchd("dpif-netdev/pmd-rxq-show")
if buf == "" {
Expand All @@ -59,7 +61,7 @@ func (Collector) Collect(ch chan<- prometheus.Metric) {
for scanner.Scan() {
line := scanner.Text()

if numa != "" && cpu != "" {
if numa != "" && cpu != "" && config.MetricSets().Has(config.METRICS_PERF) {
var val float64
var err error

Expand Down Expand Up @@ -105,6 +107,111 @@ func (Collector) Collect(ch chan<- prometheus.Metric) {
if match != nil {
numa = match[1]
cpu = match[2]
c, _ := strconv.ParseUint(cpu, 10, 64)
stat, ok := stats[c]
if !ok {
continue
}
if config.MetricSets().Has(ctxtSwitchesMetric.Set) {
ch <- prometheus.MustNewConstMetric(
ctxtSwitchesMetric.Desc(),
ctxtSwitchesMetric.ValueType,
float64(stat.ctxSwitches), numa, cpu)
}
if config.MetricSets().Has(nonVolCtxtSwitchesMetric.Set) {
ch <- prometheus.MustNewConstMetric(
nonVolCtxtSwitchesMetric.Desc(),
nonVolCtxtSwitchesMetric.ValueType,
float64(stat.nonVolCtxSwitches), numa, cpu)
}
}
}
}

type pmdstat struct {
name string
cpuAffinity uint64
numaAffinity uint64
ctxSwitches uint64
nonVolCtxSwitches uint64
}

var notPmdErr = errors.New("not a pmd thread")

func parseStatus(path string) (pmdstat, error) {
var stat pmdstat
f, err := os.Open(path)
if err != nil {
return stat, err
}
defer f.Close()

scanner := bufio.NewScanner(f)

for scanner.Scan() {
tokens := strings.Fields(scanner.Text())
if len(tokens) != 2 {
continue
}
name, value := tokens[0], tokens[1]

switch name {
case "Name:":
if !strings.HasPrefix(value, "pmd-c") {
return stat, notPmdErr
}
stat.name = value
case "Cpus_allowed_list:":
stat.cpuAffinity, _ = strconv.ParseUint(value, 10, 64)
case "Mems_allowed_list:":
stat.numaAffinity, _ = strconv.ParseUint(value, 10, 64)
case "voluntary_ctxt_switches:":
stat.ctxSwitches, _ = strconv.ParseUint(value, 10, 64)
case "nonvoluntary_ctxt_switches:":
stat.nonVolCtxSwitches, _ = strconv.ParseUint(value, 10, 64)
}
}
if scanner.Err() != nil {
return stat, scanner.Err()
}

return stat, nil
}

func getVswitchdPmdStat() map[uint64]pmdstat {
pidfile := filepath.Join(config.OvsRundir(), "ovs-vswitchd.pid")
f, err := os.Open(pidfile)
if err != nil {
log.Errf("open(%s): %s", pidfile, err)
return nil
}
defer f.Close()
buf, err := io.ReadAll(f)
if err != nil {
log.Errf("read(%s): %s", pidfile, err)
return nil
}
tasks := filepath.Join("/proc", strings.TrimSpace(string(buf)), "task")
entries, err := os.ReadDir(tasks)
if err != nil {
log.Errf("readdir(%s): %s", tasks, err)
return nil
}

stats := make(map[uint64]pmdstat)

for _, e := range entries {
if e.IsDir() {
stat, err := parseStatus(filepath.Join(tasks, e.Name(), "status"))
if err != nil {
if !errors.Is(err, notPmdErr) {
log.Errf("status(%s): %s", e.Name(), err)
}
continue
}
stats[stat.cpuAffinity] = stat
}
}

return stats
}
16 changes: 16 additions & 0 deletions collectors/pmd_rxq/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,22 @@ var overheadMetric = lib.Metric{
Set: config.METRICS_PERF,
}

var ctxtSwitchesMetric = lib.Metric{
Name: "ovs_pmd_context_switches",
Description: "Number of voluntary context switches per PMD thread.",
ValueType: prometheus.CounterValue,
Labels: []string{"numa", "cpu"},
Set: config.METRICS_PERF,
}

var nonVolCtxtSwitchesMetric = lib.Metric{
Name: "ovs_pmd_nonvol_context_switches",
Description: "Number of non-voluntary context switches per PMD thread.",
ValueType: prometheus.CounterValue,
Labels: []string{"numa", "cpu"},
Set: config.METRICS_ERRORS,
}

var usageMetric = lib.Metric{
Name: "ovs_pmd_rxq_usage",
Description: "Percentage of CPU cycles used to process packets from one Rxq.",
Expand Down

0 comments on commit c4a0667

Please sign in to comment.