Skip to content

Commit

Permalink
DAOS-7203 control: Add histogram support to Prometheus exporter (#5382)
Browse files Browse the repository at this point in the history
Update the Prometheus exporter to support passthrough histograms
from native DAOS telemetry format. Fixes a few bugs and inefficiencies
in the native histogram implementation.

Signed-off-by: Michael MacDonald <[email protected]>
  • Loading branch information
mjmac authored Nov 9, 2024
1 parent 6a4364e commit 248bb75
Show file tree
Hide file tree
Showing 21 changed files with 962 additions and 133 deletions.
10 changes: 4 additions & 6 deletions src/control/cmd/dmg/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,13 +254,11 @@ func (cmd *telemConfigCmd) configurePrometheus() (*installInfo, error) {
}

sc := &staticConfig{}
for _, h := range cmd.config.HostList {
host, _, err := common.SplitPort(h, 0)
if err != nil {
return nil, err
}
sc.Targets = append(sc.Targets, host+":9191")
sc.Targets, err = common.ParseHostList(cmd.config.HostList, 9191)
if err != nil {
return nil, err
}

cfg.ScrapeConfigs = []*scrapeConfig{
{
JobName: "daos",
Expand Down
4 changes: 4 additions & 0 deletions src/control/lib/control/telemetry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,10 @@ func TestControl_Metric_JSON(t *testing.T) {
CumulativeCount: 55,
UpperBound: 500,
},
{
CumulativeCount: 4242,
UpperBound: math.Inf(1),
},
},
},
},
Expand Down
66 changes: 66 additions & 0 deletions src/control/lib/daos/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package daos

import (
"encoding/json"
"math"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -222,6 +223,71 @@ func (ms *MetricSet) MarshalJSON() ([]byte, error) {
})
}

// jsonFloat is a terrible hack to deal with the stdlib's inabilility
// to deal with -Inf/+Inf/NaN: https://github.com/golang/go/issues/59627
type jsonFloat float64

func (jf jsonFloat) MarshalJSON() ([]byte, error) {
switch {
case math.IsInf(float64(jf), 1):
return []byte(`"+Inf"`), nil
case math.IsInf(float64(jf), -1):
return []byte(`"-Inf"`), nil
case math.IsNaN(float64(jf)):
return []byte(`"NaN"`), nil
}
return json.Marshal(float64(jf))
}

func (jf *jsonFloat) UnmarshalJSON(data []byte) error {
if err := json.Unmarshal(data, (*float64)(jf)); err == nil {
return nil
}

var stringVal string
if err := json.Unmarshal(data, &stringVal); err != nil {
return err
}

val, err := strconv.ParseFloat(stringVal, 64)
if err != nil {
return err
}

*jf = jsonFloat(val)

return nil
}

func (mb *MetricBucket) MarshalJSON() ([]byte, error) {
type toJSON MetricBucket
return json.Marshal(&struct {
*toJSON
UpperBound jsonFloat `json:"upper_bound"`
}{
toJSON: (*toJSON)(mb),
UpperBound: jsonFloat(mb.UpperBound),
})
}

func (mb *MetricBucket) UnmarshalJSON(data []byte) error {
type fromJSON MetricBucket

from := &struct {
UpperBound jsonFloat `json:"upper_bound"`
*fromJSON
}{
fromJSON: (*fromJSON)(mb),
}
if err := json.Unmarshal(data, from); err != nil {
return err
}

mb.UpperBound = float64(from.UpperBound)

return nil
}

// jsonMetric serves as a universal metric representation for unmarshaling from
// JSON. It covers all possible fields of Metric types.
type jsonMetric struct {
Expand Down
66 changes: 66 additions & 0 deletions src/control/lib/daos/telemetry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package daos

import (
"encoding/json"
"math"
"testing"
"time"

Expand Down Expand Up @@ -244,3 +245,68 @@ func TestDaos_MetricSet_JSON(t *testing.T) {
})
}
}

func TestDaos_MetricBucket_JSON(t *testing.T) {
for name, tc := range map[string]struct {
bucket *MetricBucket
expUpperBound float64
expMarshalErr error
expUnmarshalErr error
}{
"+Inf": {
bucket: &MetricBucket{
UpperBound: math.Inf(1),
},
expUpperBound: math.Inf(1),
},
"-Inf": {
bucket: &MetricBucket{
UpperBound: math.Inf(-1),
},
expUpperBound: math.Inf(-1),
},
"NaN": {
bucket: &MetricBucket{
UpperBound: math.NaN(),
},
expUpperBound: math.NaN(),
},
"42.42": {
bucket: &MetricBucket{
UpperBound: 42.42,
},
expUpperBound: 42.42,
},
"0.000": {
bucket: &MetricBucket{
UpperBound: 0.000,
},
expUpperBound: 0.000,
},
} {
t.Run(name, func(t *testing.T) {
data, gotErr := json.Marshal(tc.bucket)
test.CmpErr(t, tc.expMarshalErr, gotErr)
if tc.expMarshalErr != nil {
return
}

var gotBucket MetricBucket
gotErr = json.Unmarshal(data, &gotBucket)
test.CmpErr(t, tc.expUnmarshalErr, gotErr)
if tc.expUnmarshalErr != nil {
return
}

if math.IsNaN(tc.expUpperBound) {
if !math.IsNaN(gotBucket.UpperBound) {
t.Fatalf("UpperBound NaN value did not survive Marshal/Unmarshal (got %f)", gotBucket.UpperBound)
}
} else {
if diff := cmp.Diff(tc.expUpperBound, gotBucket.UpperBound); diff != "" {
t.Fatalf("Bucket UpperBound value did not survive Marshal/Unmarshal (-want, +got): %s", diff)
}
}
})
}
}
3 changes: 3 additions & 0 deletions src/control/lib/telemetry/counter.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ import (
"fmt"
)

var _ Metric = (*Counter)(nil)

// Counter is a counter metric.
type Counter struct {
metricBase
}
Expand Down
4 changes: 4 additions & 0 deletions src/control/lib/telemetry/duration.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,11 @@ import (
"time"
)

var _ StatsMetric = (*Duration)(nil)

type Duration struct {
statsMetric
hist *Histogram // optional histogram data
}

func (d *Duration) Type() MetricType {
Expand Down Expand Up @@ -67,6 +70,7 @@ func newDuration(hdl *handle, path string, name *string, node *C.struct_d_tm_nod
},
},
}
d.hist = newHistogram(&d.statsMetric)

// Load up statistics
_ = d.Value()
Expand Down
6 changes: 6 additions & 0 deletions src/control/lib/telemetry/gauge.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ import (
"fmt"
)

var _ Metric = (*Gauge)(nil)
var _ StatsMetric = (*StatsGauge)(nil)

// Gauge is a metric that consists of a single value that may increase or decrease.
type Gauge struct {
metricBase
Expand Down Expand Up @@ -93,6 +96,7 @@ func GetGauge(ctx context.Context, name string) (*Gauge, error) {
// StatsGauge is a gauge with statistics gathered.
type StatsGauge struct {
statsMetric
hist *Histogram // optional histogram data
}

// Type returns the type of the gauge with stats.
Expand Down Expand Up @@ -136,9 +140,11 @@ func newStatsGauge(hdl *handle, path string, name *string, node *C.struct_d_tm_n
},
},
}
g.hist = newHistogram(&g.statsMetric)

// Load up the stats
_ = g.Value()

return g
}

Expand Down
Loading

0 comments on commit 248bb75

Please sign in to comment.