From cddc1978872301766c949a26c3cd439ab8989e00 Mon Sep 17 00:00:00 2001 From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com> Date: Sun, 10 Dec 2023 19:30:37 +0000 Subject: [PATCH] feat(telemetry): enable `statsd` and `dogstatsd` telemetry sinks (backport #18646) (#18673) Co-authored-by: Julien Robert --- CHANGELOG.md | 1 + go.mod | 1 + go.sum | 1 + server/config/toml.go | 11 +++++ simapp/go.mod | 1 + simapp/go.sum | 1 + telemetry/metrics.go | 74 ++++++++++++++++++++++++++------ telemetry/metrics_test.go | 2 + tests/go.mod | 1 + tests/go.sum | 1 + tools/confix/data/v0.50-app.toml | 11 +++++ 11 files changed, 91 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b98ed3acc369..c17a01962782 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,6 +48,7 @@ Ref: https://keepachangelog.com/en/1.0.0/ ### Improvements +* (telemetry) [#18646] (https://github.com/cosmos/cosmos-sdk/pull/18646) Enable statsd and dogstatsd telemetry sinks. * (server) [#18478](https://github.com/cosmos/cosmos-sdk/pull/18478) Add command flag to disable colored logs. * (x/gov) [#18025](https://github.com/cosmos/cosmos-sdk/pull/18025) Improve ` q gov proposer` by querying directly a proposal instead of tx events. It is an alias of `q gov proposal` as the proposer is a field of the proposal. * (version) [#18063](https://github.com/cosmos/cosmos-sdk/pull/18063) Allow to define extra info to be displayed in ` version --long` command. diff --git a/go.mod b/go.mod index a6952865fd2d..6d117c5f8d12 100644 --- a/go.mod +++ b/go.mod @@ -69,6 +69,7 @@ require ( require ( filippo.io/edwards25519 v1.0.0 // indirect github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 // indirect + github.com/DataDog/datadog-go v3.2.0+incompatible // indirect github.com/DataDog/zstd v1.5.5 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/btcsuite/btcd/btcec/v2 v2.3.2 // indirect diff --git a/go.sum b/go.sum index 294cccb3a1c6..4f0786766803 100644 --- a/go.sum +++ b/go.sum @@ -62,6 +62,7 @@ github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25 github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/DataDog/datadog-go v3.2.0+incompatible h1:qSG2N4FghB1He/r2mFrWKCaL7dXCilEuNEeAn20fdD4= github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= github.com/DataDog/zstd v1.5.5 h1:oWf5W7GtOLgp6bciQYDmhHHjdhYkALu6S/5Ni9ZgSvQ= github.com/DataDog/zstd v1.5.5/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw= diff --git a/server/config/toml.go b/server/config/toml.go index 21ff62bda4f5..85ac3743737e 100644 --- a/server/config/toml.go +++ b/server/config/toml.go @@ -121,6 +121,17 @@ global-labels = [{{ range $k, $v := .Telemetry.GlobalLabels }} ["{{index $v 0 }}", "{{ index $v 1}}"],{{ end }} ] +# MetricsSink defines the type of metrics sink to use. +metrics-sink = "{{ .Telemetry.MetricsSink }}" + +# StatsdAddr defines the address of a statsd server to send metrics to. +# Only utilized if MetricsSink is set to "statsd" or "dogstatsd". +statsd-addr = "{{ .Telemetry.StatsdAddr }}" + +# DatadogHostname defines the hostname to use when emitting metrics to +# Datadog. Only utilized if MetricsSink is set to "dogstatsd". +datadog-hostname = "{{ .Telemetry.DatadogHostname }}" + ############################################################################### ### API Configuration ### ############################################################################### diff --git a/simapp/go.mod b/simapp/go.mod index 452eff048555..bfd3989dc268 100644 --- a/simapp/go.mod +++ b/simapp/go.mod @@ -42,6 +42,7 @@ require ( filippo.io/edwards25519 v1.0.0 // indirect github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 // indirect github.com/99designs/keyring v1.2.1 // indirect + github.com/DataDog/datadog-go v3.2.0+incompatible // indirect github.com/DataDog/zstd v1.5.5 // indirect github.com/aws/aws-sdk-go v1.44.224 // indirect github.com/beorn7/perks v1.0.1 // indirect diff --git a/simapp/go.sum b/simapp/go.sum index 196c3eed8fbd..b522018eb861 100644 --- a/simapp/go.sum +++ b/simapp/go.sum @@ -228,6 +228,7 @@ github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25 github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/DataDog/datadog-go v3.2.0+incompatible h1:qSG2N4FghB1He/r2mFrWKCaL7dXCilEuNEeAn20fdD4= github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= github.com/DataDog/zstd v1.5.5 h1:oWf5W7GtOLgp6bciQYDmhHHjdhYkALu6S/5Ni9ZgSvQ= github.com/DataDog/zstd v1.5.5/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw= diff --git a/telemetry/metrics.go b/telemetry/metrics.go index 690acf41d9c3..812ff0abdbd2 100644 --- a/telemetry/metrics.go +++ b/telemetry/metrics.go @@ -4,9 +4,11 @@ import ( "bytes" "encoding/json" "fmt" + "net/http" "time" "github.com/hashicorp/go-metrics" + "github.com/hashicorp/go-metrics/datadog" metricsprom "github.com/hashicorp/go-metrics/prometheus" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/expfmt" @@ -21,8 +23,17 @@ const ( FormatDefault = "" FormatPrometheus = "prometheus" FormatText = "text" + + MetricSinkInMem = "mem" + MetricSinkStatsd = "statsd" + MetricSinkDogsStatsd = "dogstatsd" ) +// DisplayableSink is an interface that defines a method for displaying metrics. +type DisplayableSink interface { + DisplayMetrics(resp http.ResponseWriter, req *http.Request) (any, error) +} + // Config defines the configuration options for application telemetry. type Config struct { // Prefixed with keys to separate services @@ -52,6 +63,17 @@ type Config struct { // Example: // [["chain_id", "cosmoshub-1"]] GlobalLabels [][]string `mapstructure:"global-labels"` + + // MetricsSink defines the type of metrics backend to use. + MetricsSink string `mapstructure:"type" default:"mem"` + + // StatsdAddr defines the address of a statsd server to send metrics to. + // Only utilized if MetricsSink is set to "statsd" or "dogstatsd". + StatsdAddr string `mapstructure:"statsd-addr"` + + // DatadogHostname defines the hostname to use when emitting metrics to + // Datadog. Only utilized if MetricsSink is set to "dogstatsd". + DatadogHostname string `mapstructure:"datadog-hostname"` } // Metrics defines a wrapper around application telemetry functionality. It allows @@ -60,7 +82,7 @@ type Config struct { // by the operator. In addition to the sinks, when a process gets a SIGUSR1, a // dump of formatted recent metrics will be sent to STDERR. type Metrics struct { - memSink *metrics.InmemSink + sink metrics.MetricSink prometheusEnabled bool } @@ -76,12 +98,11 @@ func New(cfg Config) (_ *Metrics, rerr error) { return nil, nil } - if numGlobalLables := len(cfg.GlobalLabels); numGlobalLables > 0 { - parsedGlobalLabels := make([]metrics.Label, numGlobalLables) + if numGlobalLabels := len(cfg.GlobalLabels); numGlobalLabels > 0 { + parsedGlobalLabels := make([]metrics.Label, numGlobalLabels) for i, gl := range cfg.GlobalLabels { parsedGlobalLabels[i] = NewLabel(gl[0], gl[1]) } - globalLabels = parsedGlobalLabels } @@ -89,16 +110,32 @@ func New(cfg Config) (_ *Metrics, rerr error) { metricsConf.EnableHostname = cfg.EnableHostname metricsConf.EnableHostnameLabel = cfg.EnableHostnameLabel - memSink := metrics.NewInmemSink(10*time.Second, time.Minute) - inMemSig := metrics.DefaultInmemSignal(memSink) - defer func() { - if rerr != nil { - inMemSig.Stop() - } - }() + var ( + sink metrics.MetricSink + err error + ) + switch cfg.MetricsSink { + case MetricSinkStatsd: + sink, err = metrics.NewStatsdSink(cfg.StatsdAddr) + case MetricSinkDogsStatsd: + sink, err = datadog.NewDogStatsdSink(cfg.StatsdAddr, cfg.DatadogHostname) + default: + memSink := metrics.NewInmemSink(10*time.Second, time.Minute) + sink = memSink + inMemSig := metrics.DefaultInmemSignal(memSink) + defer func() { + if rerr != nil { + inMemSig.Stop() + } + }() + } + + if err != nil { + return nil, err + } - m := &Metrics{memSink: memSink} - fanout := metrics.FanoutSink{memSink} + m := &Metrics{sink: sink} + fanout := metrics.FanoutSink{sink} if cfg.PrometheusRetentionTime > 0 { m.prometheusEnabled = true @@ -140,6 +177,8 @@ func (m *Metrics) Gather(format string) (GatherResponse, error) { } } +// gatherPrometheus collects Prometheus metrics and returns a GatherResponse. +// If Prometheus metrics are not enabled, it returns an error. func (m *Metrics) gatherPrometheus() (GatherResponse, error) { if !m.prometheusEnabled { return GatherResponse{}, fmt.Errorf("prometheus metrics are not enabled") @@ -154,6 +193,7 @@ func (m *Metrics) gatherPrometheus() (GatherResponse, error) { defer buf.Reset() e := expfmt.NewEncoder(buf, expfmt.FmtText) + for _, mf := range metricsFamilies { if err := e.Encode(mf); err != nil { return GatherResponse{}, fmt.Errorf("failed to encode prometheus metrics: %w", err) @@ -163,8 +203,14 @@ func (m *Metrics) gatherPrometheus() (GatherResponse, error) { return GatherResponse{ContentType: string(expfmt.FmtText), Metrics: buf.Bytes()}, nil } +// gatherGeneric collects generic metrics and returns a GatherResponse. func (m *Metrics) gatherGeneric() (GatherResponse, error) { - summary, err := m.memSink.DisplayMetrics(nil, nil) + gm, ok := m.sink.(DisplayableSink) + if !ok { + return GatherResponse{}, fmt.Errorf("non in-memory metrics sink does not support generic format") + } + + summary, err := gm.DisplayMetrics(nil, nil) if err != nil { return GatherResponse{}, fmt.Errorf("failed to gather in-memory metrics: %w", err) } diff --git a/telemetry/metrics_test.go b/telemetry/metrics_test.go index 2d57fdf5b5b4..1ce1103529c8 100644 --- a/telemetry/metrics_test.go +++ b/telemetry/metrics_test.go @@ -19,6 +19,7 @@ func TestMetrics_Disabled(t *testing.T) { func TestMetrics_InMem(t *testing.T) { m, err := New(Config{ + MetricsSink: MetricSinkInMem, Enabled: true, EnableHostname: false, ServiceName: "test", @@ -42,6 +43,7 @@ func TestMetrics_InMem(t *testing.T) { func TestMetrics_Prom(t *testing.T) { m, err := New(Config{ + MetricsSink: MetricSinkInMem, Enabled: true, EnableHostname: false, ServiceName: "test", diff --git a/tests/go.mod b/tests/go.mod index 53252043775e..3f9590a4e0ba 100644 --- a/tests/go.mod +++ b/tests/go.mod @@ -43,6 +43,7 @@ require ( filippo.io/edwards25519 v1.0.0 // indirect github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 // indirect github.com/99designs/keyring v1.2.1 // indirect + github.com/DataDog/datadog-go v3.2.0+incompatible // indirect github.com/DataDog/zstd v1.5.5 // indirect github.com/aws/aws-sdk-go v1.44.224 // indirect github.com/beorn7/perks v1.0.1 // indirect diff --git a/tests/go.sum b/tests/go.sum index 244b5425c683..2ba3e48301ae 100644 --- a/tests/go.sum +++ b/tests/go.sum @@ -226,6 +226,7 @@ github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25 github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/DataDog/datadog-go v3.2.0+incompatible h1:qSG2N4FghB1He/r2mFrWKCaL7dXCilEuNEeAn20fdD4= github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= github.com/DataDog/zstd v1.5.5 h1:oWf5W7GtOLgp6bciQYDmhHHjdhYkALu6S/5Ni9ZgSvQ= github.com/DataDog/zstd v1.5.5/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw= diff --git a/tools/confix/data/v0.50-app.toml b/tools/confix/data/v0.50-app.toml index 08c8fd408c61..75f587576ea5 100644 --- a/tools/confix/data/v0.50-app.toml +++ b/tools/confix/data/v0.50-app.toml @@ -108,6 +108,17 @@ prometheus-retention-time = 0 # [["chain_id", "cosmoshub-1"]] global-labels = [] +# MetricsSink defines the type of metrics sink to use. +metrics-sink = "mem" + +# StatsdAddr defines the address of a statsd server to send metrics to. +# Only utilized if MetricsSink is set to "statsd" or "dogstatsd". +statsd-addr = "" + +# DatadogHostname defines the hostname to use when emitting metrics to +# Datadog. Only utilized if MetricsSink is set to "dogstatsd". +datadog-hostname = "" + ############################################################################### ### API Configuration ### ###############################################################################