From 642c0e05db7af0daf676bb2d24efca3a2b9da68d Mon Sep 17 00:00:00 2001 From: Benjamin Drung Date: Thu, 13 Jun 2019 19:26:58 +0200 Subject: [PATCH] Add support for InfiniBand The Prometheus node exporter collects metrics for the InfiniBand network protocol including the amount of packets sent and received, the number of times the link has been downed and how many times the link has recovered from an error state. Reading all those information from sysfs is better placed in the procfs library. Also collect the state, physical state, and the rate. Signed-off-by: Benjamin Drung --- fixtures.ttar | 226 ++++++++++++++++++++ internal/util/valueparser.go | 19 ++ sysfs/class_infiniband.go | 372 +++++++++++++++++++++++++++++++++ sysfs/class_infiniband_test.go | 139 ++++++++++++ 4 files changed, 756 insertions(+) create mode 100644 sysfs/class_infiniband.go create mode 100644 sysfs/class_infiniband_test.go diff --git a/fixtures.ttar b/fixtures.ttar index 951d909af..5144fccf4 100644 --- a/fixtures.ttar +++ b/fixtures.ttar @@ -609,6 +609,232 @@ Mode: 664 Directory: fixtures/sys/class Mode: 775 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: fixtures/sys/class/infiniband +Mode: 755 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: fixtures/sys/class/infiniband/mlx4_0 +Mode: 755 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/board_id +Lines: 1 +SM_1141000001000 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/fw_ver +Lines: 1 +2.31.5050 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/hca_type +Lines: 1 +MT4099 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: fixtures/sys/class/infiniband/mlx4_0/ports +Mode: 755 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: fixtures/sys/class/infiniband/mlx4_0/ports/1 +Mode: 755 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: fixtures/sys/class/infiniband/mlx4_0/ports/1/counters +Mode: 755 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/1/counters/excessive_buffer_overrun_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/1/counters/link_downed +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/1/counters/link_error_recovery +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/1/counters/local_link_integrity_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/1/counters/port_rcv_constraint_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/1/counters/port_rcv_data +Lines: 1 +2221223609 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/1/counters/port_rcv_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/1/counters/port_rcv_packets +Lines: 1 +87169372 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/1/counters/port_rcv_remote_physical_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/1/counters/port_rcv_switch_relay_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/1/counters/port_xmit_constraint_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/1/counters/port_xmit_data +Lines: 1 +26509113295 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/1/counters/port_xmit_discards +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/1/counters/port_xmit_packets +Lines: 1 +85734114 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/1/counters/port_xmit_wait +Lines: 1 +3599 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/1/counters/symbol_error +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/1/phys_state +Lines: 1 +5: LinkUp +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/1/rate +Lines: 1 +40 Gb/sec (4X QDR) +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/1/state +Lines: 1 +4: ACTIVE +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: fixtures/sys/class/infiniband/mlx4_0/ports/2 +Mode: 755 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: fixtures/sys/class/infiniband/mlx4_0/ports/2/counters +Mode: 755 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/2/counters/excessive_buffer_overrun_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/2/counters/link_downed +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/2/counters/link_error_recovery +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/2/counters/local_link_integrity_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/2/counters/port_rcv_constraint_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/2/counters/port_rcv_data +Lines: 1 +2460436784 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/2/counters/port_rcv_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/2/counters/port_rcv_packets +Lines: 1 +89332064 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/2/counters/port_rcv_remote_physical_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/2/counters/port_rcv_switch_relay_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/2/counters/port_xmit_constraint_errors +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/2/counters/port_xmit_data +Lines: 1 +26540356890 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/2/counters/port_xmit_discards +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/2/counters/port_xmit_packets +Lines: 1 +88622850 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/2/counters/port_xmit_wait +Lines: 1 +3846 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/2/counters/symbol_error +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/2/phys_state +Lines: 1 +5: LinkUp +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/2/rate +Lines: 1 +40 Gb/sec (4X QDR) +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: fixtures/sys/class/infiniband/mlx4_0/ports/2/state +Lines: 1 +4: ACTIVE +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: fixtures/sys/class/net Mode: 775 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/internal/util/valueparser.go b/internal/util/valueparser.go index 4ecf5a782..ac93cb42d 100644 --- a/internal/util/valueparser.go +++ b/internal/util/valueparser.go @@ -52,6 +52,25 @@ func (vp *ValueParser) PInt64() *int64 { return &v } +// PUInt64 interprets the underlying value as an uint64 and returns a pointer to +// that value. +func (vp *ValueParser) PUInt64() *uint64 { + if vp.err != nil { + return nil + } + + // A base value of zero makes ParseInt infer the correct base using the + // string's prefix, if any. + const base = 0 + v, err := strconv.ParseUint(vp.v, base, 64) + if err != nil { + vp.err = err + return nil + } + + return &v +} + // Err returns the last error, if any, encountered by the ValueParser. func (vp *ValueParser) Err() error { return vp.err diff --git a/sysfs/class_infiniband.go b/sysfs/class_infiniband.go new file mode 100644 index 000000000..52d3c21d2 --- /dev/null +++ b/sysfs/class_infiniband.go @@ -0,0 +1,372 @@ +// Copyright 2019 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !windows + +package sysfs + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/prometheus/procfs/internal/util" +) + +const infinibandClassPath = "class/infiniband" + +// InfiniBandCounters contains counter values from files in +// /sys/class/infiniband//ports//counters or +// /sys/class/infiniband//ports//counters_ext +// for a single port of one InfiniBand device. +type InfiniBandCounters struct { + LegacyPortMulticastRcvPackets *uint64 // counters_ext/port_multicast_rcv_packets + LegacyPortMulticastXmitPackets *uint64 // counters_ext/port_multicast_xmit_packets + LegacyPortRcvData64 *uint64 // counters_ext/port_rcv_data_64 + LegacyPortRcvPackets64 *uint64 // counters_ext/port_rcv_packets_64 + LegacyPortUnicastRcvPackets *uint64 // counters_ext/port_unicast_rcv_packets + LegacyPortUnicastXmitPackets *uint64 // counters_ext/port_unicast_xmit_packets + LegacyPortXmitData64 *uint64 // counters_ext/port_xmit_data_64 + LegacyPortXmitPackets64 *uint64 // counters_ext/port_xmit_packets_64 + + LinkDowned *uint64 // counters/link_downed + LinkErrorRecovery *uint64 // counters/link_error_recovery + MulticastRcvPackets *uint64 // counters/multicast_rcv_packets + MulticastXmitPackets *uint64 // counters/multicast_xmit_packets + PortRcvConstraintErrors *uint64 // counters/port_rcv_constraint_errors + PortRcvData *uint64 // counters/port_rcv_data + PortRcvDiscards *uint64 // counters/port_rcv_discards + PortRcvErrors *uint64 // counters/port_rcv_errors + PortRcvPackets *uint64 // counters/port_rcv_packets + PortXmitConstraintErrors *uint64 // counters/port_xmit_constraint_errors + PortXmitData *uint64 // counters/port_xmit_data + PortXmitDiscards *uint64 // counters/port_xmit_discards + PortXmitPackets *uint64 // counters/port_xmit_packets + PortXmitWait *uint64 // counters/port_xmit_wait + UnicastRcvPackets *uint64 // counters/unicast_rcv_packets + UnicastXmitPackets *uint64 // counters/unicast_xmit_packets +} + +// InfiniBandPort contains info from files in +// /sys/class/infiniband//ports/ +// for a single port of one InfiniBand device. +type InfiniBandPort struct { + Name string + Port uint + State string // String representation from /sys/class/infiniband//ports//state + StateID uint // ID from /sys/class/infiniband//ports//state + PhysState string // String representation from /sys/class/infiniband//ports//phys_state + PhysStateID uint // String representation from /sys/class/infiniband//ports//phys_state + Rate uint64 // in bytes/second from /sys/class/infiniband//ports//rate + Counters InfiniBandCounters +} + +// InfiniBandDevice contains info from files in /sys/class/infiniband for a +// single InfiniBand device. +type InfiniBandDevice struct { + Name string + BoardID string // /sys/class/infiniband//board_id + FirmwareVersion string // /sys/class/infiniband//fw_ver + HCAType string // /sys/class/infiniband//hca_type + Ports map[uint]InfiniBandPort +} + +// InfiniBandClass is a collection of every InfiniBand device in +// /sys/class/infiniband. +// +// The map keys are the names of the InfiniBand devices. +type InfiniBandClass map[string]InfiniBandDevice + +// InfiniBandClass returns info for all InfiniBand devices read from +// /sys/class/infiniband. +func (fs FS) InfiniBandClass() (InfiniBandClass, error) { + path := fs.sys.Path(infinibandClassPath) + + dirs, err := ioutil.ReadDir(path) + if err != nil { + return nil, fmt.Errorf("failed to list InfiniBand devices at %q: %v", path, err) + } + + ibc := make(InfiniBandClass, len(dirs)) + for _, d := range dirs { + device, err := fs.parseInfiniBandDevice(d.Name()) + if err != nil { + return nil, err + } + + ibc[device.Name] = *device + } + + return ibc, nil +} + +// Parse one InfiniBand device. +func (fs FS) parseInfiniBandDevice(name string) (*InfiniBandDevice, error) { + path := fs.sys.Path(infinibandClassPath, name) + device := InfiniBandDevice{Name: name} + + for _, f := range [3]string{"board_id", "fw_ver", "hca_type"} { + name := filepath.Join(path, f) + value, err := util.SysReadFile(name) + if err != nil { + return nil, fmt.Errorf("failed to read file %q: %v", name, err) + } + + switch f { + case "board_id": + device.BoardID = value + case "fw_ver": + device.FirmwareVersion = value + case "hca_type": + device.HCAType = value + } + } + + portsPath := filepath.Join(path, "ports") + ports, err := ioutil.ReadDir(portsPath) + if err != nil { + return nil, fmt.Errorf("failed to list InfiniBand ports at %q: %v", portsPath, err) + } + + device.Ports = make(map[uint]InfiniBandPort, len(ports)) + for _, d := range ports { + port, err := fs.parseInfiniBandPort(name, d.Name()) + if err != nil { + return nil, err + } + + device.Ports[port.Port] = *port + } + + return &device, nil +} + +// Parse InfiniBand state. Expected format: ": " +func parseState(s string) (uint, string, error) { + var id uint + var name string + parts := strings.Split(s, ":") + if len(parts) != 2 { + return id, name, fmt.Errorf("failed to split %s into 'ID: NAME'", s) + } + name = strings.TrimSpace(parts[1]) + value, err := strconv.ParseUint(strings.TrimSpace(parts[0]), 10, 32) + if err != nil { + return id, name, fmt.Errorf("failed to convert %s into uint", strings.TrimSpace(parts[0])) + } + id = uint(value) + return id, name, nil +} + +// Parse rate (example: "100 Gb/sec (4X EDR)") and return it as bytes/second +func parseRate(s string) (uint64, error) { + var rate uint64 + parts := strings.Split(s, "Gb/sec") + if len(parts) != 2 { + return rate, fmt.Errorf("failed to split '%s' by 'Gb/sec'", s) + } + value, err := strconv.ParseFloat(strings.TrimSpace(parts[0]), 32) + if err != nil { + return rate, fmt.Errorf("failed to convert %s into uint", strings.TrimSpace(parts[0])) + } + rate = uint64(value * 125000000) + return rate, nil +} + +// parseInfiniBandPort scans predefined files in /sys/class/infiniband//ports/ +// directory and gets their contents. +func (fs FS) parseInfiniBandPort(name string, port string) (*InfiniBandPort, error) { + portNumber, err := strconv.ParseUint(port, 10, 32) + if err != nil { + return nil, fmt.Errorf("failed to convert %s into uint", port) + } + infiniBandPort := InfiniBandPort{Name: name, Port: uint(portNumber)} + + portPath := fs.sys.Path(infinibandClassPath, name, "ports", port) + content, err := ioutil.ReadFile(filepath.Join(portPath, "state")) + if err != nil { + return nil, err + } + id, name, err := parseState(string(content)) + if err != nil { + return nil, fmt.Errorf("could not parse state file in %s: %s", portPath, err) + } + infiniBandPort.State = name + infiniBandPort.StateID = id + + content, err = ioutil.ReadFile(filepath.Join(portPath, "phys_state")) + if err != nil { + return nil, err + } + id, name, err = parseState(string(content)) + if err != nil { + return nil, fmt.Errorf("could not parse phys_state file in %s: %s", portPath, err) + } + infiniBandPort.PhysState = name + infiniBandPort.PhysStateID = id + + content, err = ioutil.ReadFile(filepath.Join(portPath, "rate")) + if err != nil { + return nil, err + } + infiniBandPort.Rate, err = parseRate(string(content)) + if err != nil { + return nil, fmt.Errorf("could not parse rate file in %s: %s", portPath, err) + } + + counters, err := parseInfiniBandCounters(portPath) + if err != nil { + return nil, err + } + infiniBandPort.Counters = *counters + + return &infiniBandPort, nil +} + +func parseInfiniBandCounters(portPath string) (*InfiniBandCounters, error) { + var counters InfiniBandCounters + + path := filepath.Join(portPath, "counters") + files, err := ioutil.ReadDir(path) + if err != nil { + return nil, err + } + + for _, f := range files { + if f.IsDir() { + continue + } + + name := filepath.Join(path, f.Name()) + value, err := util.SysReadFile(name) + if err != nil { + return nil, fmt.Errorf("failed to read file %q: %v", name, err) + } + + // According to Mellanox, the metrics port_rcv_data, port_xmit_data, + // port_rcv_data_64, and port_xmit_data_64 "are divided by 4 unconditionally" + // as they represent the amount of data being transmitted and received per lane. + // Mellanox cards have 4 lanes per port, so all values must be multiplied by 4 + // to get the expected value. + + vp := util.NewValueParser(value) + + switch f.Name() { + case "link_downed": + counters.LinkDowned = vp.PUInt64() + case "link_error_recovery": + counters.LinkErrorRecovery = vp.PUInt64() + case "multicast_rcv_packets": + counters.MulticastRcvPackets = vp.PUInt64() + case "multicast_xmit_packets": + counters.MulticastXmitPackets = vp.PUInt64() + case "port_rcv_constraint_errors": + counters.PortRcvConstraintErrors = vp.PUInt64() + case "port_rcv_data": + counters.PortRcvData = vp.PUInt64() + *counters.PortRcvData *= 4 + case "port_rcv_discards": + counters.PortRcvDiscards = vp.PUInt64() + case "port_rcv_errors": + counters.PortRcvErrors = vp.PUInt64() + case "port_rcv_packets": + counters.PortRcvPackets = vp.PUInt64() + case "port_xmit_constraint_errors": + counters.PortXmitConstraintErrors = vp.PUInt64() + case "port_xmit_data": + counters.PortXmitData = vp.PUInt64() + *counters.PortXmitData *= 4 + case "port_xmit_discards": + counters.PortXmitDiscards = vp.PUInt64() + case "port_xmit_packets": + counters.PortXmitPackets = vp.PUInt64() + case "port_xmit_wait": + counters.PortXmitWait = vp.PUInt64() + case "unicast_rcv_packets": + counters.UnicastRcvPackets = vp.PUInt64() + case "unicast_xmit_packets": + counters.UnicastXmitPackets = vp.PUInt64() + } + + if err := vp.Err(); err != nil { + // Ugly workaround for handling https://github.com/prometheus/node_exporter/issues/966 + // when counters are `N/A (not available)`. + // This was already patched and submitted, see + // https://www.spinics.net/lists/linux-rdma/msg68596.html + // Remove this as soon as the fix lands in the enterprise distros. + if strings.Contains(value, "N/A (no PMA)") { + continue + } + return nil, err + } + } + + // Parse legacy counters + path = filepath.Join(portPath, "counters_ext") + files, err = ioutil.ReadDir(path) + if err != nil && !os.IsNotExist(err) { + return nil, err + } + + for _, f := range files { + if f.IsDir() { + continue + } + + name := filepath.Join(path, f.Name()) + value, err := util.SysReadFile(name) + if err != nil { + return nil, fmt.Errorf("failed to read file %q: %v", name, err) + } + + vp := util.NewValueParser(value) + + switch f.Name() { + case "port_multicast_rcv_packets": + counters.LegacyPortMulticastRcvPackets = vp.PUInt64() + case "port_multicast_xmit_packets": + counters.LegacyPortMulticastXmitPackets = vp.PUInt64() + case "port_rcv_data_64": + counters.LegacyPortRcvData64 = vp.PUInt64() + *counters.LegacyPortRcvData64 *= 4 + case "port_rcv_packets_64": + counters.LegacyPortRcvPackets64 = vp.PUInt64() + case "port_unicast_rcv_packets": + counters.LegacyPortUnicastRcvPackets = vp.PUInt64() + case "port_unicast_xmit_packets": + counters.LegacyPortUnicastXmitPackets = vp.PUInt64() + case "port_xmit_data_64": + counters.LegacyPortXmitData64 = vp.PUInt64() + *counters.LegacyPortXmitData64 *= 4 + case "port_xmit_packets_64": + counters.LegacyPortXmitPackets64 = vp.PUInt64() + } + + if err := vp.Err(); err != nil { + // Ugly workaround for handling https://github.com/prometheus/node_exporter/issues/966 + // when counters are `N/A (not available)`. + // This was already patched and submitted, see + // https://www.spinics.net/lists/linux-rdma/msg68596.html + // Remove this as soon as the fix lands in the enterprise distros. + if strings.Contains(value, "N/A (no PMA)") { + continue + } + return nil, err + } + } + + return &counters, nil +} diff --git a/sysfs/class_infiniband_test.go b/sysfs/class_infiniband_test.go new file mode 100644 index 000000000..5b6a64f5a --- /dev/null +++ b/sysfs/class_infiniband_test.go @@ -0,0 +1,139 @@ +// Copyright 2019 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !windows + +package sysfs + +import ( + "testing" + + "github.com/google/go-cmp/cmp" +) + +func TestParseSlowRate(t *testing.T) { + rate, err := parseRate("2.5 Gb/sec (1X SDR)") + if err != nil { + t.Fatal(err) + } + if rate != 312500000 { + t.Errorf("Result for InfiniBand rate not correct: want %v, have %v", 312500000, rate) + } +} + +func TestParseRate(t *testing.T) { + rate, err := parseRate("500 Gb/sec (4X HDR)") + if err != nil { + t.Fatal(err) + } + if rate != 62500000000 { + t.Errorf("Result for InfiniBand rate not correct: want %v, have %v", 62500000000, rate) + } +} + +func TestInfiniBandClass(t *testing.T) { + fs, err := NewFS(sysTestFixtures) + if err != nil { + t.Fatal(err) + } + + got, err := fs.InfiniBandClass() + if err != nil { + t.Fatal(err) + } + + var ( + port1LinkDowned uint64 = 0 + port1LinkErrorRecovery uint64 = 0 + port1PortRcvConstraintErrors uint64 = 0 + port1PortRcvData uint64 = 8884894436 + port1PortRcvErrors uint64 = 0 + port1PortRcvPackets uint64 = 87169372 + port1PortXmitConstraintErrors uint64 = 0 + port1PortXmitData uint64 = 106036453180 + port1PortXmitDiscards uint64 = 0 + port1PortXmitPackets uint64 = 85734114 + port1PortXmitWait uint64 = 3599 + + port2LinkDowned uint64 = 0 + port2LinkErrorRecovery uint64 = 0 + port2PortRcvConstraintErrors uint64 = 0 + port2PortRcvData uint64 = 9841747136 + port2PortRcvErrors uint64 = 0 + port2PortRcvPackets uint64 = 89332064 + port2PortXmitConstraintErrors uint64 = 0 + port2PortXmitData uint64 = 106161427560 + port2PortXmitDiscards uint64 = 0 + port2PortXmitPackets uint64 = 88622850 + port2PortXmitWait uint64 = 3846 + ) + + want := InfiniBandClass{ + "mlx4_0": InfiniBandDevice{ + Name: "mlx4_0", + BoardID: "SM_1141000001000", + FirmwareVersion: "2.31.5050", + HCAType: "MT4099", + Ports: map[uint]InfiniBandPort{ + 1: { + Name: "mlx4_0", + Port: 1, + State: "ACTIVE", + StateID: 4, + PhysState: "LinkUp", + PhysStateID: 5, + Rate: 5000000000, + Counters: InfiniBandCounters{ + LinkDowned: &port1LinkDowned, + LinkErrorRecovery: &port1LinkErrorRecovery, + PortRcvConstraintErrors: &port1PortRcvConstraintErrors, + PortRcvData: &port1PortRcvData, + PortRcvErrors: &port1PortRcvErrors, + PortRcvPackets: &port1PortRcvPackets, + PortXmitConstraintErrors: &port1PortXmitConstraintErrors, + PortXmitData: &port1PortXmitData, + PortXmitDiscards: &port1PortXmitDiscards, + PortXmitPackets: &port1PortXmitPackets, + PortXmitWait: &port1PortXmitWait, + }, + }, + 2: { + Name: "mlx4_0", + Port: 2, + State: "ACTIVE", + StateID: 4, + PhysState: "LinkUp", + PhysStateID: 5, + Rate: 5000000000, + Counters: InfiniBandCounters{ + LinkDowned: &port2LinkDowned, + LinkErrorRecovery: &port2LinkErrorRecovery, + PortRcvConstraintErrors: &port2PortRcvConstraintErrors, + PortRcvData: &port2PortRcvData, + PortRcvErrors: &port2PortRcvErrors, + PortRcvPackets: &port2PortRcvPackets, + PortXmitConstraintErrors: &port2PortXmitConstraintErrors, + PortXmitData: &port2PortXmitData, + PortXmitDiscards: &port2PortXmitDiscards, + PortXmitPackets: &port2PortXmitPackets, + PortXmitWait: &port2PortXmitWait, + }, + }, + }, + }, + } + + if diff := cmp.Diff(want, got); diff != "" { + t.Fatalf("unexpected InfiniBand class (-want +got):\n%s", diff) + } +}