diff --git a/README.md b/README.md index aeb83f9..69ee86d 100644 --- a/README.md +++ b/README.md @@ -114,3 +114,11 @@ If an interface is marked as the WAN interface, these metrics are populated. - `wan_tx_bytes`: Bytes transmitted since last reset - `wan_rx_rate`: Bytes received rate (momentarily) - `wan_tx_rate`: Bytes transmitted rate (momentarily) + +### Ping Metrics + +- `ping_loss_ratio`: Packet loss ratio (range 0-1, with 0.33 meaning 33% packet loss) +- `ping_rtt_best_seconds`: Best round trip time, in seconds +- `ping_rtt_mean_seconds`: Mean round trip time, in seconds +- `ping_rtt_worst_seconds`: Worst round trip time, in seconds +- `ping_rtt_std_deviation_seconds`: Standard deviation for round trip time, in seconds diff --git a/exporter/device.go b/exporter/device.go index 76cb6e5..e8e5ba4 100644 --- a/exporter/device.go +++ b/exporter/device.go @@ -10,8 +10,13 @@ import ( var defaultWithInterfaces = true -func (e *Exporter) fetchDeviceData() ([]*models.DeviceStatusOverview, error) { - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) +type Device struct { + Statistics *models.DeviceStatistics + *models.DeviceStatusOverview +} + +func (e *Exporter) fetchDeviceData() ([]Device, error) { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() params := &devices.GetDevicesParams{ @@ -23,13 +28,46 @@ func (e *Exporter) fetchDeviceData() ([]*models.DeviceStatusOverview, error) { return nil, err } - data := make([]*models.DeviceStatusOverview, 0, len(devicesResponse.Payload)) + data := make([]Device, 0, len(devicesResponse.Payload)) for _, overview := range devicesResponse.Payload { if overview.Identification == nil { continue } - data = append(data, overview) + dev := Device{nil, overview} + + if id := derefOrEmpty(overview.Identification.ID); id != "" { + params := &devices.GetDevicesIDStatisticsParams{ + ID: id, + Interval: "hour", // smallest interval possible + Context: ctx, + } + statisticsResponse, err := e.api.Devices.GetDevicesIDStatistics(params) + if err != nil { + return nil, err + } + dev.Statistics = statisticsResponse.Payload + } + data = append(data, dev) } return data, nil } + +func (dev *Device) PingMetrics() *PingMetrics { + if dev.Statistics == nil || len(dev.Statistics.Ping) == 0 { + return nil + } + + m := NewHistory(len(dev.Statistics.Ping)) + for _, xy := range dev.Statistics.Ping { + if xy == nil { + m.Add(0, true) + continue + } + + rtt := time.Duration(xy.Y * float64(time.Millisecond)) + m.Add(rtt, false) + } + + return m.Compute() +} diff --git a/exporter/device_test.go b/exporter/device_test.go new file mode 100644 index 0000000..76f2892 --- /dev/null +++ b/exporter/device_test.go @@ -0,0 +1,91 @@ +package exporter + +import ( + "testing" + "time" + + "github.com/ffddorf/unms-exporter/models" +) + +const ( + ms = time.Millisecond + µs = time.Microsecond //nolint:asciicheck +) + +type metricExpectation map[string]struct { + actual interface{} + satisfied bool +} + +func comparePingMetrics(t *testing.T, expectations metricExpectation, actual *PingMetrics) { + t.Helper() + + anyFailure := false + for field, expectation := range expectations { + if !expectation.satisfied { + anyFailure = true + t.Errorf("unexpected value for field %q: %v", field, expectation.actual) + } + } + if anyFailure { + t.FailNow() + } +} + +func TestDevice_PingMetrics_connected(t *testing.T) { + t.Parallel() + + subject := Device{ + Statistics: &models.DeviceStatistics{ + Ping: models.ListOfCoordinates{{Y: 5}, {Y: 10}, {Y: 25}, {Y: 15}, {Y: 1}}, // x values are ignored + }, + } + + actual := subject.PingMetrics() + if actual == nil { + t.Error("expected PingMetrics() to return somthing, got nil") + } + + comparePingMetrics(t, metricExpectation{ + "packets sent": {actual.PacketsSent, actual.PacketsSent == 5}, + "packets lost": {actual.PacketsLost, actual.PacketsLost == 0}, + "rtt best": {actual.Best, actual.Best == 1*ms}, + "rtt worst": {actual.Worst, actual.Worst == 25*ms}, + "rtt median": {actual.Median, actual.Median == 10*ms}, + "rtt meain": {actual.Mean, actual.Mean == 11200*µs}, // 11.2ms + "rtt std dev": {actual.StdDev, 8350*µs < actual.StdDev && actual.StdDev < 8360*µs}, // ~8.352245ms + }, actual) +} + +func TestDevice_PingMetrics_missingPackets(t *testing.T) { + t.Parallel() + + subject := Device{ + Statistics: &models.DeviceStatistics{ + Ping: models.ListOfCoordinates{nil, {Y: 100}, {Y: 250}, nil, {Y: 120}}, + }, + } + + actual := subject.PingMetrics() + if actual == nil { + t.Error("expected PingMetrics() to return somthing, got nil") + } + + comparePingMetrics(t, metricExpectation{ + "packets sent": {actual.PacketsSent, actual.PacketsSent == 5}, + "packets lost": {actual.PacketsLost, actual.PacketsLost == 2}, + "rtt best": {actual.Best, actual.Best == 100*ms}, + "rtt worst": {actual.Worst, actual.Worst == 250*ms}, + "rtt median": {actual.Median, actual.Median == 120*ms}, + "rtt meain": {actual.Mean, 156666*µs < actual.Mean && actual.Mean < 156667*µs}, // 156.66666ms + "rtt std dev": {actual.StdDev, 66499*µs < actual.StdDev && actual.StdDev < 66500*µs}, // ~66.499791ms + }, actual) +} + +func TestDevice_PingMetrics_disconnected(t *testing.T) { + t.Parallel() + + if actual := (&Device{}).PingMetrics(); actual != nil { + t.Errorf("expected PingMetrics() to return nil, got %+v", actual) + } +} diff --git a/exporter/exporter.go b/exporter/exporter.go index 07f488b..032e128 100644 --- a/exporter/exporter.go +++ b/exporter/exporter.go @@ -66,6 +66,12 @@ var metricSpecs = map[string]metricSpec{ "wan_tx_bytes": newSpec("Bytes sent on WAN interface", nil), "wan_rx_rate": newSpec("Receive rate on WAN interface", nil), "wan_tx_rate": newSpec("Transmit rate on WAN interface", nil), + + "ping_loss_ratio": newSpec("Ping packet loss ratio", nil), + "ping_rtt_best_seconds": newSpec("Best ping round trip time in seconds", nil), + "ping_rtt_mean_seconds": newSpec("Mean ping round trip time in seconds", nil), + "ping_rtt_worst_seconds": newSpec("Worst ping round trip time in seconds", nil), + "ping_rtt_std_deviation_seconds": newSpec("Standard deviation for ping round trip time in seconds", nil), } type Exporter struct { @@ -232,6 +238,19 @@ func (e *Exporter) collectImpl(out chan<- prom.Metric) error { out <- e.newMetric("wan_rx_rate", prom.GaugeValue, wanIF.Statistics.Rxrate, deviceLabels...) out <- e.newMetric("wan_tx_rate", prom.GaugeValue, wanIF.Statistics.Txrate, deviceLabels...) } + + // Ping metrics + ratio := 1.0 + if ping := device.PingMetrics(); ping != nil { + if ping.PacketsSent > 0 { + ratio = float64(ping.PacketsLost) / float64(ping.PacketsSent) + } + out <- e.newMetric("ping_rtt_best_seconds", prom.GaugeValue, ping.Best.Seconds(), deviceLabels...) + out <- e.newMetric("ping_rtt_mean_seconds", prom.GaugeValue, ping.Mean.Seconds(), deviceLabels...) + out <- e.newMetric("ping_rtt_worst_seconds", prom.GaugeValue, ping.Worst.Seconds(), deviceLabels...) + out <- e.newMetric("ping_rtt_std_deviation_seconds", prom.GaugeValue, ping.StdDev.Seconds(), deviceLabels...) + } + out <- e.newMetric("ping_loss_ratio", prom.GaugeValue, ratio, deviceLabels...) } return nil diff --git a/exporter/ping_rtt.go b/exporter/ping_rtt.go new file mode 100644 index 0000000..8725dbf --- /dev/null +++ b/exporter/ping_rtt.go @@ -0,0 +1,95 @@ +package exporter + +import ( + "math" + "sort" + "time" +) + +// PingMetrics is a dumb data point computed from a list of PingResults. +type PingMetrics struct { + PacketsSent int // number of packets sent + PacketsLost int // number of packets lost + Best time.Duration // best RTT + Worst time.Duration // worst RTT + Median time.Duration // median RTT + Mean time.Duration // mean RTT + StdDev time.Duration // RTT std deviation +} + +// PingResult stores the information about a single ping, in particular +// the round-trip time or whether the packet was lost. +type PingResult struct { + RTT time.Duration + Lost bool +} + +// PingHistory represents the ping history for a single node/device. +type PingHistory []PingResult + +// NewHistory creates a new History object with a specific capacity. +func NewHistory(capacity int) PingHistory { + return make(PingHistory, 0, capacity) +} + +// AddResult saves a ping result into the internal history. +func (h *PingHistory) Add(rtt time.Duration, lost bool) { + *h = append(*h, PingResult{RTT: rtt, Lost: lost}) +} + +// Compute aggregates the result history into a single data point. +func (h PingHistory) Compute() *PingMetrics { + numFailure := 0 + numTotal := len(h) + + if numTotal == 0 { + return nil + } + + data := make([]float64, 0, numTotal) + var best, worst, mean, stddev, total, sumSquares float64 + + for _, curr := range h { + if curr.Lost { + numFailure++ + continue + } + + rtt := curr.RTT.Seconds() + if rtt < best || len(data) == 0 { + best = rtt + } + if rtt > worst || len(data) == 0 { + worst = rtt + } + data = append(data, rtt) + total += rtt + } + + size := float64(numTotal - numFailure) + mean = total / size + for _, rtt := range data { + sumSquares += math.Pow(rtt-mean, 2) + } + stddev = math.Sqrt(sumSquares / size) + + median := math.NaN() + if l := len(data); l > 0 { + sort.Float64Slice(data).Sort() + if l%2 == 0 { + median = (data[l/2-1] + data[l/2]) / 2 + } else { + median = data[l/2] + } + } + + return &PingMetrics{ + PacketsSent: numTotal, + PacketsLost: numFailure, + Best: time.Duration(best * float64(time.Second)), + Worst: time.Duration(worst * float64(time.Second)), + Median: time.Duration(median * float64(time.Second)), + Mean: time.Duration(mean * float64(time.Second)), + StdDev: time.Duration(stddev * float64(time.Second)), + } +}