Skip to content

Commit

Permalink
Fix timer metric bucket (#3136)
Browse files Browse the repository at this point in the history
- Deprecated DefaultHistogramBoundaries in PrometheusConfig
- Wired up buckets for tally timer metrics
- Fix otel timer metrics and emit values in milliseconds
  • Loading branch information
yycptt authored and alexshtin committed Jul 29, 2022
1 parent 84149d3 commit b48cf2a
Show file tree
Hide file tree
Showing 7 changed files with 97 additions and 106 deletions.
127 changes: 55 additions & 72 deletions common/metrics/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
"github.com/uber-go/tally/v4"
"github.com/uber-go/tally/v4/m3"
"github.com/uber-go/tally/v4/prometheus"
"golang.org/x/exp/maps"

"go.temporal.io/server/common/log"
"go.temporal.io/server/common/log/tag"
Expand Down Expand Up @@ -69,9 +70,6 @@ type (
// - "dimensionless"
// - "milliseconds"
// - "bytes"
// - see defs.go:L62
//
// Tally implementation uses default buckets for timer/duration metrics.
PerUnitHistogramBoundaries map[string][]float64 `yaml:"perUnitHistogramBoundaries"`
}

Expand Down Expand Up @@ -104,9 +102,6 @@ type (
Framework string `yaml:"framework"`
// Address for prometheus to serve metrics from.
ListenAddress string `yaml:"listenAddress"`
// DefaultHistogramBoundaries defines the default histogram bucket
// boundaries.
DefaultHistogramBoundaries []float64 `yaml:"defaultHistogramBoundaries"`

// HandlerPath if specified will be used instead of using the default
// HTTP handler path "/metrics".
Expand All @@ -119,9 +114,15 @@ type (
ListenNetwork string `yaml:"listenNetwork"`

// Deprecated. TimerType is the default Prometheus type to use for Tally timers.
// TimerType is always histogram.
TimerType string `yaml:"timerType"`

// Deprecated. DefaultHistogramBuckets if specified will set the default histogram
// Deprecated. Please use PerUnitHistogramBoundaries in ClientConfig.
// DefaultHistogramBoundaries defines the default histogram bucket boundaries.
DefaultHistogramBoundaries []float64 `yaml:"defaultHistogramBoundaries"`

// Deprecated. Please use PerUnitHistogramBoundaries in ClientConfig.
// DefaultHistogramBuckets if specified will set the default histogram
// buckets to be used by the reporter.
DefaultHistogramBuckets []HistogramObjective `yaml:"defaultHistogramBuckets"`

Expand Down Expand Up @@ -149,17 +150,21 @@ type SummaryObjective struct {
AllowedError float64 `yaml:"allowedError"`
}

// Supported framework types
const (
ms = float64(time.Millisecond) / float64(time.Second)

// Supported framework types

// FrameworkTally tally framework id
FrameworkTally = "tally"
// FrameworkOpentelemetry OpenTelemetry framework id
FrameworkOpentelemetry = "opentelemetry"
)

// Valid unit name for PerUnitHistogramBoundaries config field
const (
UnitNameDimensionless = "dimensionless"
UnitNameMilliseconds = "milliseconds"
UnitNameBytes = "bytes"
)

// tally sanitizer options that satisfy both Prometheus and M3 restrictions.
// This will rename metrics at the tally emission level, so metrics name we
// use maybe different from what gets emitted. In the current implementation
Expand Down Expand Up @@ -199,34 +204,34 @@ var (
100,
200,
500,
1000,
2000,
5000,
10000,
20000,
50000,
100000,
1_000,
2_000,
5_000,
10_000,
20_000,
50_000,
100_000,
},
Milliseconds: {
1 * ms,
2 * ms,
5 * ms,
10 * ms,
20 * ms,
50 * ms,
100 * ms,
200 * ms,
500 * ms,
1000 * ms,
2000 * ms,
5000 * ms,
10000 * ms,
20000 * ms,
50000 * ms,
100000 * ms,
200000 * ms,
500000 * ms,
1000000 * ms,
1,
2,
5,
10,
20,
50,
100,
200,
500,
1_000, // 1s
2_000,
5_000,
10_000, // 10s
20_000,
50_000,
100_000, // 100s = 1m40s
200_000,
500_000,
1_000_000, // 1000s = 16m40s
},
Bytes: {
1024,
Expand Down Expand Up @@ -265,21 +270,6 @@ func NewScope(logger log.Logger, c *Config) tally.Scope {
return tally.NoopScope
}

func buildHistogramBuckets(
config *PrometheusConfig,
) []prometheus.HistogramObjective {
var result []prometheus.HistogramObjective
if len(config.DefaultHistogramBuckets) > 0 {
result = make([]prometheus.HistogramObjective, len(config.DefaultHistogramBuckets))
for i, item := range config.DefaultHistogramBuckets {
result[i].Upper = item.Upper
}
} else if len(config.DefaultHistogramBoundaries) > 0 {
result = histogramBoundariesToHistogramObjectives(config.DefaultHistogramBoundaries)
}
return result
}

func convertPrometheusConfigToTally(
config *PrometheusConfig,
) *prometheus.Configuration {
Expand All @@ -294,21 +284,27 @@ func convertPrometheusConfigToTally(
ListenNetwork: config.ListenNetwork,
ListenAddress: config.ListenAddress,
TimerType: "histogram",
DefaultHistogramBuckets: buildHistogramBuckets(config),
DefaultSummaryObjectives: defaultObjectives,
OnError: config.OnError,
}
}

func setDefaultPerUnitHistogramBoundaries(clientConfig *ClientConfig) {
if clientConfig.PerUnitHistogramBoundaries == nil {
clientConfig.PerUnitHistogramBoundaries = make(map[string][]float64)
buckets := maps.Clone(defaultPerUnitHistogramBoundaries)

// In config, when overwrite default buckets, we use [dimensionless / miliseconds / bytes] as keys.
// But in code, we use [1 / ms / By] as key (to align with otel unit definition). So we do conversion here.
if bucket, ok := clientConfig.PerUnitHistogramBoundaries[UnitNameDimensionless]; ok {
buckets[Dimensionless] = bucket
}
for unit, bucket := range defaultPerUnitHistogramBoundaries {
if _, ok := clientConfig.PerUnitHistogramBoundaries[unit]; !ok {
clientConfig.PerUnitHistogramBoundaries[unit] = bucket
}
if bucket, ok := clientConfig.PerUnitHistogramBoundaries[UnitNameMilliseconds]; ok {
buckets[Milliseconds] = bucket
}
if bucket, ok := clientConfig.PerUnitHistogramBoundaries[UnitNameBytes]; ok {
buckets[Bytes] = bucket
}

clientConfig.PerUnitHistogramBoundaries = buckets
}

// newStatsdScope returns a new statsd scope with
Expand Down Expand Up @@ -366,19 +362,6 @@ func newPrometheusScope(
return scope
}

func histogramBoundariesToHistogramObjectives(boundaries []float64) []prometheus.HistogramObjective {
var result []prometheus.HistogramObjective
for _, value := range boundaries {
result = append(
result,
prometheus.HistogramObjective{
Upper: value,
},
)
}
return result
}

// MetricsHandlerFromConfig is used at startup to construct a MetricsHandler
func MetricsHandlerFromConfig(logger log.Logger, c *Config) MetricsHandler {
if c == nil {
Expand Down
13 changes: 6 additions & 7 deletions common/metrics/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,21 +127,20 @@ func (s *MetricsSuite) TestSetDefaultPerUnitHistogramBoundaries() {

customizedBoundaries := map[string][]float64{
Dimensionless: {1},
"notDefine": {1},
Milliseconds: defaultPerUnitHistogramBoundaries[Milliseconds],
Bytes: defaultPerUnitHistogramBoundaries[Bytes],
}
testCases := []histogramTest{
{
nil,
defaultPerUnitHistogramBoundaries,
input: nil,
expectResult: defaultPerUnitHistogramBoundaries,
},
{
map[string][]float64{
Dimensionless: {1},
"notDefine": {1},
input: map[string][]float64{
UnitNameDimensionless: {1},
"notDefine": {1},
},
customizedBoundaries,
expectResult: customizedBoundaries,
},
}

Expand Down
2 changes: 1 addition & 1 deletion common/metrics/otel_metric_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ func (omp *otelMetricsHandler) Timer(timer string) TimerMetric {
}

return TimerMetricFunc(func(i time.Duration, t ...Tag) {
c.Record(context.Background(), i.Nanoseconds(), tagsToAttributes(omp.tags, t, omp.excludeTags)...)
c.Record(context.Background(), i.Milliseconds(), tagsToAttributes(omp.tags, t, omp.excludeTags)...)
})
}

Expand Down
6 changes: 3 additions & 3 deletions common/metrics/otel_metric_provider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,14 @@ func TestMeter(t *testing.T) {
},
{
InstrumentName: "latency",
Sum: number.NewInt64Number(int64(2503 * time.Millisecond)),
Sum: number.NewInt64Number(6503),
Count: 2,
Attributes: nil,
InstrumentationLibrary: lib,
AggregationKind: aggregation.HistogramKind,
NumberKind: number.Int64Kind,
Histogram: aggregation.Buckets{
Counts: []uint64{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2},
Counts: []uint64{1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
},
},
{
Expand Down Expand Up @@ -157,7 +157,7 @@ func recordMetrics(mp MetricsHandler) {
c.Record(8)
g.Record(-100, StringTag("location", "Mare Imbrium"))
d.Record(1248 * time.Millisecond)
d.Record(1255 * time.Millisecond)
d.Record(5255 * time.Millisecond)
h.Record(1234567)
t.Record(11, TaskQueueTag("__sticky__"))
e.Record(14, TaskQueueTag("filtered"))
Expand Down
17 changes: 11 additions & 6 deletions common/metrics/tally_metric_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,17 @@ var _ MetricsHandler = (*tallyMetricsHandler)(nil)
func NewTallyMetricsHandler(cfg ClientConfig, scope tally.Scope) *tallyMetricsHandler {
perUnitBuckets := make(map[MetricUnit]tally.Buckets)

if cfg.PerUnitHistogramBoundaries == nil {
setDefaultPerUnitHistogramBoundaries(&cfg)
}

for unit, boundariesList := range cfg.PerUnitHistogramBoundaries {
perUnitBuckets[MetricUnit(unit)] = tally.ValueBuckets(boundariesList)
if unit != Milliseconds {
perUnitBuckets[MetricUnit(unit)] = tally.ValueBuckets(boundariesList)
continue
}

durations := make([]time.Duration, 0, len(boundariesList))
for _, duration := range boundariesList {
durations = append(durations, time.Duration(duration)*time.Millisecond)
}
perUnitBuckets[MetricUnit(unit)] = tally.DurationBuckets(durations)
}

return &tallyMetricsHandler{
Expand Down Expand Up @@ -90,7 +95,7 @@ func (tmp *tallyMetricsHandler) Gauge(gauge string) GaugeMetric {
// Timer obtains a timer for the given name and MetricOptions.
func (tmp *tallyMetricsHandler) Timer(timer string) TimerMetric {
return TimerMetricFunc(func(d time.Duration, tag ...Tag) {
tmp.scope.Tagged(tagsToMap(tmp.tags, tag, tmp.excludeTags)).Timer(timer).Record(d)
tmp.scope.Tagged(tagsToMap(tmp.tags, tag, tmp.excludeTags)).Histogram(timer, tmp.perUnitBuckets[Milliseconds]).RecordDuration(d)
})
}

Expand Down
27 changes: 21 additions & 6 deletions common/metrics/tally_metric_provider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,24 @@ import (
"github.com/uber-go/tally/v4"
)

var defaultConfig = ClientConfig{
Tags: nil,
ExcludeTags: map[string][]string{
"taskqueue": {"__sticky__"},
"activityType": {},
"workflowType": {},
},
Prefix: "",
PerUnitHistogramBoundaries: map[string][]float64{Dimensionless: {0, 10, 100}, Bytes: {1024, 2048}, Milliseconds: {10, 500, 1000, 5000, 10000}},
}

func TestTallyScope(t *testing.T) {
scope := tally.NewTestScope("test", map[string]string{})
mp := NewTallyMetricsHandler(defaultConfig, scope)
recordTallyMetrics(mp)

snap := scope.Snapshot()
counters, gauges, timers, histograms := snap.Counters(), snap.Gauges(), snap.Timers(), snap.Histograms()
counters, gauges, timers, histograms := snap.Counters(), snap.Gauges(), snap.Histograms(), snap.Histograms()

assert.EqualValues(t, 8, counters["test.hits+"].Value())
assert.EqualValues(t, map[string]string{}, counters["test.hits+"].Tags())
Expand All @@ -55,10 +66,14 @@ func TestTallyScope(t *testing.T) {
"location": "Mare Imbrium",
}, gauges["test.temp+location=Mare Imbrium"].Tags())

assert.EqualValues(t, []time.Duration{
1248 * time.Millisecond,
1255 * time.Millisecond,
}, timers["test.latency+"].Values())
assert.EqualValues(t, map[time.Duration]int64{
10 * time.Millisecond: 0,
500 * time.Millisecond: 0,
1000 * time.Millisecond: 0,
5000 * time.Millisecond: 1,
10000 * time.Millisecond: 1,
math.MaxInt64: 0,
}, timers["test.latency+"].Durations())
assert.EqualValues(t, map[string]string{}, timers["test.latency+"].Tags())

assert.EqualValues(t, map[float64]int64{
Expand All @@ -81,7 +96,7 @@ func recordTallyMetrics(mp MetricsHandler) {
c.Record(8)
g.Record(-100, StringTag("location", "Mare Imbrium"))
d.Record(1248 * time.Millisecond)
d.Record(1255 * time.Millisecond)
d.Record(5255 * time.Millisecond)
h.Record(1234567)
t.Record(11, TaskQueueTag("__sticky__"))
e.Record(14, TaskQueueTag("filtered"))
Expand Down
11 changes: 0 additions & 11 deletions common/metrics/userscope_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,6 @@ import (
"time"
)

var defaultConfig = ClientConfig{
Tags: nil,
ExcludeTags: map[string][]string{
"taskqueue": {"__sticky__"},
"activityType": {},
"workflowType": {},
},
Prefix: "",
PerUnitHistogramBoundaries: map[string][]float64{Dimensionless: {0, 10, 100}, Bytes: {1024, 2048}},
}

func BenchmarkAllTheMetricsUserScope(b *testing.B) {
var emp MetricsHandler = NoopMetricsHandler.WithTags(OperationTag("everything-is-awesome-3"))
var us UserScope = newUserScope(emp, defaultConfig.Tags)
Expand Down

0 comments on commit b48cf2a

Please sign in to comment.