Skip to content

Commit

Permalink
Collect non-numeric data from /sys/class/infiniband (prometheus#1563)
Browse files Browse the repository at this point in the history
Let the node exporter collect the non-numeric data from
/sys/class/infiniband: board ID, firmware version, and HCA type.

Signed-off-by: Benjamin Drung <[email protected]>

Co-authored-by: Ben Kochie <[email protected]>
  • Loading branch information
2 people authored and oblitorum committed Apr 9, 2024
1 parent eec00ca commit 4dc0171
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 1 deletion.
4 changes: 4 additions & 0 deletions collector/fixtures/e2e-64k-page-output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,10 @@ node_hwmon_temp_max_celsius{chip="platform_coretemp_1",sensor="temp2"} 84
node_hwmon_temp_max_celsius{chip="platform_coretemp_1",sensor="temp3"} 84
node_hwmon_temp_max_celsius{chip="platform_coretemp_1",sensor="temp4"} 84
node_hwmon_temp_max_celsius{chip="platform_coretemp_1",sensor="temp5"} 84
# HELP node_infiniband_info Non-numeric data from /sys/class/infiniband/<device>, value is always 1.
# TYPE node_infiniband_info gauge
node_infiniband_info{board_id="I40IW Board ID",device="i40iw0",firmware_version="0.2",hca_type="I40IW"} 1
node_infiniband_info{board_id="SM_1141000001000",device="mlx4_0",firmware_version="2.31.5050",hca_type="MT4099"} 1
# HELP node_infiniband_legacy_data_received_bytes_total Number of data octets received on all links
# TYPE node_infiniband_legacy_data_received_bytes_total counter
node_infiniband_legacy_data_received_bytes_total{device="mlx4_0",port="1"} 1.8527668e+07
Expand Down
4 changes: 4 additions & 0 deletions collector/fixtures/e2e-output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1085,6 +1085,10 @@ node_hwmon_temp_max_celsius{chip="platform_coretemp_1",sensor="temp2"} 84
node_hwmon_temp_max_celsius{chip="platform_coretemp_1",sensor="temp3"} 84
node_hwmon_temp_max_celsius{chip="platform_coretemp_1",sensor="temp4"} 84
node_hwmon_temp_max_celsius{chip="platform_coretemp_1",sensor="temp5"} 84
# HELP node_infiniband_info Non-numeric data from /sys/class/infiniband/<device>, value is always 1.
# TYPE node_infiniband_info gauge
node_infiniband_info{board_id="I40IW Board ID",device="i40iw0",firmware_version="0.2",hca_type="I40IW"} 1
node_infiniband_info{board_id="SM_1141000001000",device="mlx4_0",firmware_version="2.31.5050",hca_type="MT4099"} 1
# HELP node_infiniband_legacy_data_received_bytes_total Number of data octets received on all links
# TYPE node_infiniband_legacy_data_received_bytes_total counter
node_infiniband_legacy_data_received_bytes_total{device="mlx4_0",port="1"} 1.8527668e+07
Expand Down
13 changes: 12 additions & 1 deletion collector/infiniband_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ type infinibandCollector struct {
fs sysfs.FS
metricDescs map[string]*prometheus.Desc
logger log.Logger
subsystem string
}

func init() {
Expand Down Expand Up @@ -80,10 +81,11 @@ func NewInfiniBandCollector(logger log.Logger) (Collector, error) {
}

i.metricDescs = make(map[string]*prometheus.Desc)
i.subsystem = "infiniband"

for metricName, description := range descriptions {
i.metricDescs[metricName] = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "infiniband", metricName),
prometheus.BuildFQName(namespace, i.subsystem, metricName),
description,
[]string{"device", "port"},
nil,
Expand Down Expand Up @@ -114,6 +116,15 @@ func (c *infinibandCollector) Update(ch chan<- prometheus.Metric) error {
}

for _, device := range devices {
infoDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, c.subsystem, "info"),
"Non-numeric data from /sys/class/infiniband/<device>, value is always 1.",
[]string{"device", "board_id", "firmware_version", "hca_type"},
nil,
)
infoValue := 1.0
ch <- prometheus.MustNewConstMetric(infoDesc, prometheus.GaugeValue, infoValue, device.Name, device.BoardID, device.FirmwareVersion, device.HCAType)

for _, port := range device.Ports {
portStr := strconv.FormatUint(uint64(port.Port), 10)

Expand Down

0 comments on commit 4dc0171

Please sign in to comment.