diff --git a/CHANGELOG.md b/CHANGELOG.md index c735b8b95c1..febcf3e9341 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,11 +17,14 @@ We use _breaking :warning:_ to mark changes that are not backward compatible (re - [3919](https://github.com/thanos-io/thanos/pull/3919) Allow to disable automatically setting CORS headers using `--web.disable-cors` flag in each component that exposes an API. ### Fixed + - [#3204](https://github.com/thanos-io/thanos/pull/3204) Mixin: Use sidecar's metric timestamp for healthcheck. - [#3922](https://github.com/thanos-io/thanos/pull/3922) Fix panic in http logging middleware. - [#3937](https://github.com/thanos-io/thanos/pull/3937) Store: Fix race condition in chunk pool. ### Changed +- [#3948](https://github.com/thanos-io/thanos/pull/3948) Receiver: Adjust `http_request_duration_seconds` buckets for low latency requests. +- [#3856](https://github.com/thanos-io/thanos/pull/3856) Mixin: _breaking :warning:_ Introduce flexible multi-cluster/namespace mode for alerts and dashboards. Removes jobPrefix config option. Removes `namespace` by default. - [#3937](https://github.com/thanos-io/thanos/pull/3937) Store: Reduce memory usage for range queries. ### Removed @@ -37,6 +40,7 @@ We use _breaking :warning:_ to mark changes that are not backward compatible (re - [#3792](https://github.com/thanos-io/thanos/pull/3792) Receiver: Added `--tsdb.allow-overlapping-blocks` flag to allow overlapping tsdb blocks and enable vertical compaction - [#3031](https://github.com/thanos-io/thanos/pull/3031) Compact/Sidecar/other writers: added `--hash-func`. If some function has been specified, writers calculate hashes using that function of each file in a block before uploading them. If those hashes exist in the `meta.json` file then Compact does not download the files if they already exist on disk and with the same hash. This also means that the data directory passed to Thanos Compact is only *cleared once at boot* or *if everything succeeds*. So, if you, for example, use persistent volumes on k8s and your Thanos Compact crashes or fails to make an iteration properly then the last downloaded files are not wiped from the disk. The directories that were created the last time are only wiped again after a successful iteration or if the previously picked up blocks have disappeared. - [#3686](https://github.com/thanos-io/thanos/pull/3686) Query: Added federated metric metadata support. +- [#3846](https://github.com/thanos-io/thanos/pull/3846) Query: Added federated exemplars API support. ### Fixed diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 19b6a1c80bb..e8b532afa17 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -457,7 +457,7 @@ func runCompact( if conf.wait { r := route.New() - ins := extpromhttp.NewInstrumentationMiddleware(reg) + ins := extpromhttp.NewInstrumentationMiddleware(reg, nil) compactorView.Register(r, true, ins) global := ui.NewBucketUI(logger, conf.label, conf.webConf.externalPrefix, conf.webConf.prefixHeaderName, "/global", component) diff --git a/cmd/thanos/query.go b/cmd/thanos/query.go index 2da6f1085df..e97929d4643 100644 --- a/cmd/thanos/query.go +++ b/cmd/thanos/query.go @@ -32,6 +32,7 @@ import ( "github.com/thanos-io/thanos/pkg/component" "github.com/thanos-io/thanos/pkg/discovery/cache" "github.com/thanos-io/thanos/pkg/discovery/dns" + "github.com/thanos-io/thanos/pkg/exemplars" "github.com/thanos-io/thanos/pkg/extgrpc" "github.com/thanos-io/thanos/pkg/extkingpin" "github.com/thanos-io/thanos/pkg/extprom" @@ -59,6 +60,7 @@ func registerQuery(app *extkingpin.App) { grpcBindAddr, grpcGracePeriod, grpcCert, grpcKey, grpcClientCA := extkingpin.RegisterGRPCFlags(cmd) secure := cmd.Flag("grpc-client-tls-secure", "Use TLS when talking to the gRPC server").Default("false").Bool() + skipVerify := cmd.Flag("grpc-client-tls-skip-verify", "Disable TLS certificate verification i.e self signed, signed by fake CA").Default("false").Bool() cert := cmd.Flag("grpc-client-tls-cert", "TLS Certificates to use to identify this client to the server").Default("").String() key := cmd.Flag("grpc-client-tls-key", "TLS Key for the client's certificate").Default("").String() caCert := cmd.Flag("grpc-client-tls-ca", "TLS CA Certificates to use to verify gRPC servers").Default("").String() @@ -103,6 +105,9 @@ func registerQuery(app *extkingpin.App) { metadataEndpoints := cmd.Flag("metadata", "Experimental: Addresses of statically configured metadata API servers (repeatable). The scheme may be prefixed with 'dns+' or 'dnssrv+' to detect metadata API servers through respective DNS lookups."). Hidden().PlaceHolder("").Strings() + exemplarEndpoints := cmd.Flag("exemplar", "Experimental: Addresses of statically configured exemplars API servers (repeatable). The scheme may be prefixed with 'dns+' or 'dnssrv+' to detect exemplars API servers through respective DNS lookups."). + Hidden().PlaceHolder("").Strings() + strictStores := cmd.Flag("store-strict", "Addresses of only statically configured store API servers that are always used, even if the health check fails. Useful if you have a caching layer on top."). PlaceHolder("").Strings() @@ -159,6 +164,10 @@ func registerQuery(app *extkingpin.App) { return errors.Errorf("Address %s is duplicated for --metadata flag.", dup) } + if dup := firstDuplicate(*exemplarEndpoints); dup != "" { + return errors.Errorf("Address %s is duplicated for --exemplar flag.", dup) + } + httpLogOpts, err := logging.ParseHTTPOptions(*reqLogDecision, reqLogConfig) if err != nil { return errors.Wrap(err, "error while parsing config for request logging") @@ -200,6 +209,7 @@ func registerQuery(app *extkingpin.App) { *grpcKey, *grpcClientCA, *secure, + *skipVerify, *cert, *key, *caCert, @@ -223,6 +233,7 @@ func registerQuery(app *extkingpin.App) { *stores, *ruleEndpoints, *metadataEndpoints, + *exemplarEndpoints, *enableAutodownsampling, *enableQueryPartialResponse, *enableRulePartialResponse, @@ -256,6 +267,7 @@ func runQuery( grpcKey string, grpcClientCA string, secure bool, + skipVerify bool, cert string, key string, caCert string, @@ -279,6 +291,7 @@ func runQuery( storeAddrs []string, ruleAddrs []string, metadataAddrs []string, + exemplarAddrs []string, enableAutodownsampling bool, enableQueryPartialResponse bool, enableRulePartialResponse bool, @@ -299,7 +312,7 @@ func runQuery( Help: "The number of times a duplicated store addresses is detected from the different configs in query", }) - dialOpts, err := extgrpc.StoreClientGRPCOpts(logger, reg, tracer, secure, cert, key, caCert, serverName) + dialOpts, err := extgrpc.StoreClientGRPCOpts(logger, reg, tracer, secure, skipVerify, cert, key, caCert, serverName) if err != nil { return errors.Wrap(err, "building gRPC client") } @@ -329,6 +342,12 @@ func runQuery( dns.ResolverType(dnsSDResolver), ) + dnsExemplarProvider := dns.NewProvider( + logger, + extprom.WrapRegistererWithPrefix("thanos_query_exemplar_apis_", reg), + dns.ResolverType(dnsSDResolver), + ) + var ( stores = query.NewStoreSet( logger, @@ -362,12 +381,20 @@ func runQuery( return specs }, + func() (specs []query.ExemplarSpec) { + for _, addr := range dnsExemplarProvider.Addresses() { + specs = append(specs, query.NewGRPCStoreSpec(addr, false)) + } + + return specs + }, dialOpts, unhealthyStoreTimeout, ) proxy = store.NewProxyStore(logger, reg, stores.Get, component.Query, selectorLset, storeResponseTimeout) rulesProxy = rules.NewProxy(logger, stores.GetRulesClients) metadataProxy = metadata.NewProxy(logger, stores.GetMetadataClients) + exemplarsProxy = exemplars.NewProxy(logger, stores.GetExemplarsClients) queryableCreator = query.NewQueryableCreator( logger, extprom.WrapRegistererWithPrefix("thanos_query_", reg), @@ -457,6 +484,9 @@ func runQuery( if err := dnsMetadataProvider.Resolve(resolveCtx, metadataAddrs); err != nil { level.Error(logger).Log("msg", "failed to resolve addresses for metadataAPIs", "err", err) } + if err := dnsExemplarProvider.Resolve(resolveCtx, exemplarAddrs); err != nil { + level.Error(logger).Log("msg", "failed to resolve addresses for exemplarsAPI", "err", err) + } return nil }) }, func(error) { @@ -493,7 +523,7 @@ func runQuery( // Configure Request Logging for HTTP calls. logMiddleware := logging.NewHTTPServerMiddleware(logger, httpLogOpts...) - ins := extpromhttp.NewInstrumentationMiddleware(reg) + ins := extpromhttp.NewInstrumentationMiddleware(reg, nil) // TODO(bplotka in PR #513 review): pass all flags, not only the flags needed by prefix rewriting. ui.NewQueryUI(logger, stores, webExternalPrefix, webPrefixHeaderName).Register(router, ins) @@ -505,6 +535,7 @@ func runQuery( // NOTE: Will share the same replica label as the query for now. rules.NewGRPCClientWithDedup(rulesProxy, queryReplicaLabels), metadata.NewGRPCClient(metadataProxy), + exemplars.NewGRPCClientWithDedup(exemplarsProxy, queryReplicaLabels), enableAutodownsampling, enableQueryPartialResponse, enableRulePartialResponse, @@ -551,6 +582,7 @@ func runQuery( grpcserver.WithServer(store.RegisterStoreServer(proxy)), grpcserver.WithServer(rules.RegisterRulesServer(rulesProxy)), grpcserver.WithServer(metadata.RegisterMetadataServer(metadataProxy)), + grpcserver.WithServer(exemplars.RegisterExemplarsServer(exemplarsProxy)), grpcserver.WithListen(grpcBindAddr), grpcserver.WithGracePeriod(grpcGracePeriod), grpcserver.WithTLSConfig(tlsCfg), diff --git a/cmd/thanos/query_frontend.go b/cmd/thanos/query_frontend.go index b693e6fddae..68374b4500e 100644 --- a/cmd/thanos/query_frontend.go +++ b/cmd/thanos/query_frontend.go @@ -213,7 +213,7 @@ func runQueryFrontend( // Configure Request Logging for HTTP calls. logMiddleware := logging.NewHTTPServerMiddleware(logger, httpLogOpts...) - ins := extpromhttp.NewInstrumentationMiddleware(reg) + ins := extpromhttp.NewInstrumentationMiddleware(reg, nil) // Start metrics HTTP server. { diff --git a/cmd/thanos/receive.go b/cmd/thanos/receive.go index 5a00e0ff79c..3d728f6bc1d 100644 --- a/cmd/thanos/receive.go +++ b/cmd/thanos/receive.go @@ -233,7 +233,7 @@ func runReceive( if err != nil { return err } - dialOpts, err := extgrpc.StoreClientGRPCOpts(logger, reg, tracer, rwServerCert != "", rwClientCert, rwClientKey, rwClientServerCA, rwClientServerName) + dialOpts, err := extgrpc.StoreClientGRPCOpts(logger, reg, tracer, rwServerCert != "", rwServerClientCA == "", rwClientCert, rwClientKey, rwClientServerCA, rwClientServerName) if err != nil { return err } diff --git a/cmd/thanos/rule.go b/cmd/thanos/rule.go index 178452f9884..e92ec700470 100644 --- a/cmd/thanos/rule.go +++ b/cmd/thanos/rule.go @@ -621,7 +621,7 @@ func runRule( } }) - ins := extpromhttp.NewInstrumentationMiddleware(reg) + ins := extpromhttp.NewInstrumentationMiddleware(reg, nil) // Configure Request Logging for HTTP calls. logMiddleware := logging.NewHTTPServerMiddleware(logger, httpLogOpts...) diff --git a/cmd/thanos/sidecar.go b/cmd/thanos/sidecar.go index dd93addc855..b8318f9bf97 100644 --- a/cmd/thanos/sidecar.go +++ b/cmd/thanos/sidecar.go @@ -24,6 +24,7 @@ import ( "github.com/prometheus/prometheus/pkg/labels" "github.com/thanos-io/thanos/pkg/block/metadata" "github.com/thanos-io/thanos/pkg/component" + "github.com/thanos-io/thanos/pkg/exemplars" "github.com/thanos-io/thanos/pkg/extflag" "github.com/thanos-io/thanos/pkg/exthttp" "github.com/thanos-io/thanos/pkg/extkingpin" @@ -230,6 +231,7 @@ func runSidecar( grpcserver.WithServer(store.RegisterStoreServer(promStore)), grpcserver.WithServer(rules.RegisterRulesServer(rules.NewPrometheus(conf.prometheus.url, c, m.Labels))), grpcserver.WithServer(meta.RegisterMetadataServer(meta.NewPrometheus(conf.prometheus.url, c))), + grpcserver.WithServer(exemplars.RegisterExemplarsServer(exemplars.NewPrometheus(conf.prometheus.url, c, m.Labels))), grpcserver.WithListen(conf.grpc.bindAddress), grpcserver.WithGracePeriod(time.Duration(conf.grpc.gracePeriod)), grpcserver.WithTLSConfig(tlsCfg), diff --git a/cmd/thanos/store.go b/cmd/thanos/store.go index e91f52e8293..3538c94374a 100644 --- a/cmd/thanos/store.go +++ b/cmd/thanos/store.go @@ -407,7 +407,7 @@ func runStore( // Add bucket UI for loaded blocks. { r := route.New() - ins := extpromhttp.NewInstrumentationMiddleware(reg) + ins := extpromhttp.NewInstrumentationMiddleware(reg, nil) compactorView := ui.NewBucketUI(logger, "", externalPrefix, prefixHeader, "/loaded", component) compactorView.Register(r, true, ins) diff --git a/cmd/thanos/tools_bucket.go b/cmd/thanos/tools_bucket.go index 41b1abc4ffd..fa92ca739dd 100644 --- a/cmd/thanos/tools_bucket.go +++ b/cmd/thanos/tools_bucket.go @@ -342,7 +342,7 @@ func registerBucketWeb(app extkingpin.AppClause, objStoreConfig *extflag.PathOrC ) router := route.New() - ins := extpromhttp.NewInstrumentationMiddleware(reg) + ins := extpromhttp.NewInstrumentationMiddleware(reg, nil) bucketUI := ui.NewBucketUI(logger, *label, *webExternalPrefix, *webPrefixHeaderName, "", component.Bucket) bucketUI.Register(router, true, ins) diff --git a/docs/components/query.md b/docs/components/query.md index a8729089e6c..9a6a9267574 100644 --- a/docs/components/query.md +++ b/docs/components/query.md @@ -324,6 +324,9 @@ Flags: CA is specified, there is no client verification on server side. (tls.NoClientCert) --grpc-client-tls-secure Use TLS when talking to the gRPC server + --grpc-client-tls-skip-verify + Disable TLS certificate verification i.e self + signed, signed by fake CA --grpc-client-tls-cert="" TLS Certificates to use to identify this client to the server --grpc-client-tls-key="" TLS Key for the client's certificate diff --git a/examples/alerts/alerts.md b/examples/alerts/alerts.md index be63d66b026..9729019d105 100644 --- a/examples/alerts/alerts.md +++ b/examples/alerts/alerts.md @@ -10,17 +10,17 @@ name: thanos-compact rules: - alert: ThanosCompactMultipleRunning annotations: - description: No more than one Thanos Compact instance should be running at once. - There are {{ $value }} + description: 'No more than one Thanos Compact instance should be running at once. + There are {{$value}} ' runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompactmultiplerunning summary: Thanos Compact has multiple instances running. - expr: sum(up{job=~"thanos-compact.*"}) > 1 + expr: sum by (job) (up{job=~"thanos-compact.*"}) > 1 for: 5m labels: severity: warning - alert: ThanosCompactHalted annotations: - description: Thanos Compact {{$labels.job}} has failed to run and now is halted. + description: Thanos Compact {{$labels.job}} has failed to run and now is halted. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompacthalted summary: Thanos Compact has failed to run ans is now halted. expr: thanos_compact_halted{job=~"thanos-compact.*"} == 1 @@ -29,8 +29,8 @@ rules: severity: warning - alert: ThanosCompactHighCompactionFailures annotations: - description: Thanos Compact {{$labels.job}} is failing to execute {{ $value | - humanize }}% of compactions. + description: Thanos Compact {{$labels.job}} , is failing to execute {{$value | + humanize}}% of compactions. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompacthighcompactionfailures summary: Thanos Compact is failing to execute compactions. expr: | @@ -45,8 +45,8 @@ rules: severity: warning - alert: ThanosCompactBucketHighOperationFailures annotations: - description: Thanos Compact {{$labels.job}} Bucket is failing to execute {{ $value - | humanize }}% of operations. + description: Thanos Compact {{$labels.job}} , Bucket is failing to execute {{$value + | humanize}}% of operations. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompactbuckethighoperationfailures summary: Thanos Compact Bucket is having a high number of operation failures. expr: | @@ -61,10 +61,11 @@ rules: severity: warning - alert: ThanosCompactHasNotRun annotations: - description: Thanos Compact {{$labels.job}} has not uploaded anything for 24 hours. + description: Thanos Compact {{$labels.job}} has not uploaded anything for 24 + hours. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompacthasnotrun summary: Thanos Compact has not uploaded anything for last 24 hours. - expr: (time() - max(max_over_time(thanos_objstore_bucket_last_successful_upload_time{job=~"thanos-compact.*"}[24h]))) + expr: (time() - max by (job) (max_over_time(thanos_objstore_bucket_last_successful_upload_time{job=~"thanos-compact.*"}[24h]))) / 60 / 60 > 24 labels: severity: warning @@ -80,34 +81,34 @@ name: thanos-rule rules: - alert: ThanosRuleQueueIsDroppingAlerts annotations: - description: Thanos Rule {{$labels.job}} is failing to queue alerts. + description: Thanos Rule {{$labels.instance}} is failing to queue alerts. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulequeueisdroppingalerts summary: Thanos Rule is failing to queue alerts. expr: | - sum by (job) (rate(thanos_alert_queue_alerts_dropped_total{job=~"thanos-rule.*"}[5m])) > 0 + sum by (job, instance) (rate(thanos_alert_queue_alerts_dropped_total{job=~"thanos-rule.*"}[5m])) > 0 for: 5m labels: severity: critical - alert: ThanosRuleSenderIsFailingAlerts annotations: - description: Thanos Rule {{$labels.job}} is failing to send alerts to alertmanager. + description: Thanos Rule {{$labels.instance}} is failing to send alerts to alertmanager. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulesenderisfailingalerts summary: Thanos Rule is failing to send alerts to alertmanager. expr: | - sum by (job) (rate(thanos_alert_sender_alerts_dropped_total{job=~"thanos-rule.*"}[5m])) > 0 + sum by (job, instance) (rate(thanos_alert_sender_alerts_dropped_total{job=~"thanos-rule.*"}[5m])) > 0 for: 5m labels: severity: critical - alert: ThanosRuleHighRuleEvaluationFailures annotations: - description: Thanos Rule {{$labels.job}} is failing to evaluate rules. + description: Thanos Rule {{$labels.instance}} is failing to evaluate rules. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulehighruleevaluationfailures summary: Thanos Rule is failing to evaluate rules. expr: | ( - sum by (job) (rate(prometheus_rule_evaluation_failures_total{job=~"thanos-rule.*"}[5m])) + sum by (job, instance) (rate(prometheus_rule_evaluation_failures_total{job=~"thanos-rule.*"}[5m])) / - sum by (job) (rate(prometheus_rule_evaluations_total{job=~"thanos-rule.*"}[5m])) + sum by (job, instance) (rate(prometheus_rule_evaluations_total{job=~"thanos-rule.*"}[5m])) * 100 > 5 ) for: 5m @@ -115,18 +116,18 @@ rules: severity: critical - alert: ThanosRuleHighRuleEvaluationWarnings annotations: - description: Thanos Rule {{$labels.job}} has high number of evaluation warnings. + description: Thanos Rule {{$labels.instance}} has high number of evaluation warnings. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulehighruleevaluationwarnings summary: Thanos Rule has high number of evaluation warnings. expr: | - sum by (job) (rate(thanos_rule_evaluation_with_warnings_total{job=~"thanos-rule.*"}[5m])) > 0 + sum by (job, instance) (rate(thanos_rule_evaluation_with_warnings_total{job=~"thanos-rule.*"}[5m])) > 0 for: 15m labels: severity: info - alert: ThanosRuleRuleEvaluationLatencyHigh annotations: - description: Thanos Rule {{$labels.job}}/{{$labels.instance}} has higher evaluation - latency than interval for {{$labels.rule_group}}. + description: Thanos Rule {{$labels.instance}} has higher evaluation latency than + interval for {{$labels.rule_group}}. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosruleruleevaluationlatencyhigh summary: Thanos Rule has high rule evaluation latency. expr: | @@ -140,15 +141,15 @@ rules: severity: warning - alert: ThanosRuleGrpcErrorRate annotations: - description: Thanos Rule {{$labels.job}} is failing to handle {{ $value | humanize - }}% of requests. + description: Thanos Rule {{$labels.job}} is failing to handle {{$value | humanize}}% + of requests. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulegrpcerrorrate summary: Thanos Rule is failing to handle grpc requests. expr: | ( - sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-rule.*"}[5m])) + sum by (job, instance) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-rule.*"}[5m])) / - sum by (job) (rate(grpc_server_started_total{job=~"thanos-rule.*"}[5m])) + sum by (job, instance) (rate(grpc_server_started_total{job=~"thanos-rule.*"}[5m])) * 100 > 5 ) for: 5m @@ -159,22 +160,22 @@ rules: description: Thanos Rule {{$labels.job}} has not been able to reload its configuration. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosruleconfigreloadfailure summary: Thanos Rule has not been able to reload configuration. - expr: avg(thanos_rule_config_last_reload_successful{job=~"thanos-rule.*"}) by (job) + expr: avg by (job, instance) (thanos_rule_config_last_reload_successful{job=~"thanos-rule.*"}) != 1 for: 5m labels: severity: info - alert: ThanosRuleQueryHighDNSFailures annotations: - description: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing + description: Thanos Rule {{$labels.job}} has {{$value | humanize}}% of failing DNS queries for query endpoints. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulequeryhighdnsfailures summary: Thanos Rule is having high number of DNS failures. expr: | ( - sum by (job) (rate(thanos_rule_query_apis_dns_failures_total{job=~"thanos-rule.*"}[5m])) + sum by (job, instance) (rate(thanos_rule_query_apis_dns_failures_total{job=~"thanos-rule.*"}[5m])) / - sum by (job) (rate(thanos_rule_query_apis_dns_lookups_total{job=~"thanos-rule.*"}[5m])) + sum by (job, instance) (rate(thanos_rule_query_apis_dns_lookups_total{job=~"thanos-rule.*"}[5m])) * 100 > 1 ) for: 15m @@ -182,15 +183,15 @@ rules: severity: warning - alert: ThanosRuleAlertmanagerHighDNSFailures annotations: - description: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing + description: Thanos Rule {{$labels.instance}} has {{$value | humanize}}% of failing DNS queries for Alertmanager endpoints. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulealertmanagerhighdnsfailures summary: Thanos Rule is having high number of DNS failures. expr: | ( - sum by (job) (rate(thanos_rule_alertmanagers_dns_failures_total{job=~"thanos-rule.*"}[5m])) + sum by (job, instance) (rate(thanos_rule_alertmanagers_dns_failures_total{job=~"thanos-rule.*"}[5m])) / - sum by (job) (rate(thanos_rule_alertmanagers_dns_lookups_total{job=~"thanos-rule.*"}[5m])) + sum by (job, instance) (rate(thanos_rule_alertmanagers_dns_lookups_total{job=~"thanos-rule.*"}[5m])) * 100 > 1 ) for: 15m @@ -198,28 +199,28 @@ rules: severity: warning - alert: ThanosRuleNoEvaluationFor10Intervals annotations: - description: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% rule groups + description: Thanos Rule {{$labels.job}} has {{$value | humanize}}% rule groups that did not evaluate for at least 10x of their expected interval. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulenoevaluationfor10intervals summary: Thanos Rule has rule groups that did not evaluate for 10 intervals. expr: | - time() - max by (job, group) (prometheus_rule_group_last_evaluation_timestamp_seconds{job=~"thanos-rule.*"}) + time() - max by (job, instance, group) (prometheus_rule_group_last_evaluation_timestamp_seconds{job=~"thanos-rule.*"}) > - 10 * max by (job, group) (prometheus_rule_group_interval_seconds{job=~"thanos-rule.*"}) + 10 * max by (job, instance, group) (prometheus_rule_group_interval_seconds{job=~"thanos-rule.*"}) for: 5m labels: severity: info - alert: ThanosNoRuleEvaluations annotations: - description: Thanos Rule {{$labels.job}} did not perform any rule evaluations - in the past 2 minutes. + description: Thanos Rule {{$labels.instance}} did not perform any rule evaluations + in the past 10 minutes. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosnoruleevaluations summary: Thanos Rule did not perform any rule evaluations. expr: | - sum(rate(prometheus_rule_evaluations_total{job=~"thanos-rule.*"}[2m])) <= 0 + sum by (job, instance) (rate(prometheus_rule_evaluations_total{job=~"thanos-rule.*"}[5m])) <= 0 and - sum(thanos_rule_loaded_rules{job=~"thanos-rule.*"}) > 0 - for: 3m + sum by (job, instance) (thanos_rule_loaded_rules{job=~"thanos-rule.*"}) > 0 + for: 5m labels: severity: critical ``` @@ -232,8 +233,8 @@ name: thanos-store rules: - alert: ThanosStoreGrpcErrorRate annotations: - description: Thanos Store {{$labels.job}} is failing to handle {{ $value | humanize - }}% of requests. + description: Thanos Store {{$labels.job}} is failing to handle {{$value | humanize}}% + of requests. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstoregrpcerrorrate summary: Thanos Store is failing to handle qrpcd requests. expr: | @@ -248,8 +249,8 @@ rules: severity: warning - alert: ThanosStoreSeriesGateLatencyHigh annotations: - description: Thanos Store {{$labels.job}} has a 99th percentile latency of {{ - $value }} seconds for store series gate requests. + description: Thanos Store {{$labels.job}} has a 99th percentile latency of {{$value}} + seconds for store series gate requests. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstoreseriesgatelatencyhigh summary: Thanos Store has high latency for store series gate requests. expr: | @@ -263,8 +264,8 @@ rules: severity: warning - alert: ThanosStoreBucketHighOperationFailures annotations: - description: Thanos Store {{$labels.job}} Bucket is failing to execute {{ $value - | humanize }}% of operations. + description: Thanos Store {{$labels.job}} Bucket is failing to execute {{$value + | humanize}}% of operations. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstorebuckethighoperationfailures summary: Thanos Store Bucket is failing to execute operations. expr: | @@ -280,7 +281,7 @@ rules: - alert: ThanosStoreObjstoreOperationLatencyHigh annotations: description: Thanos Store {{$labels.job}} Bucket has a 99th percentile latency - of {{ $value }} seconds for the bucket operations. + of {{$value}} seconds for the bucket operations. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstoreobjstoreoperationlatencyhigh summary: Thanos Store is having high latency for bucket operations. expr: | @@ -302,34 +303,31 @@ name: thanos-sidecar rules: - alert: ThanosSidecarPrometheusDown annotations: - description: Thanos Sidecar {{$labels.job}} {{$labels.instance}} cannot connect - to Prometheus. + description: Thanos Sidecar {{$labels.instance}} cannot connect to Prometheus. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarprometheusdown summary: Thanos Sidecar cannot connect to Prometheus expr: | - sum by (job, instance) (thanos_sidecar_prometheus_up{job=~"thanos-sidecar.*"} == 0) + thanos_sidecar_prometheus_up{job=~"thanos-sidecar.*"} == 0 for: 5m labels: severity: critical - alert: ThanosSidecarBucketOperationsFailed annotations: - description: Thanos Sidecar {{$labels.job}} {{$labels.instance}} bucket operations - are failing + description: Thanos Sidecar {{$labels.instance}} bucket operations are failing runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarbucketoperationsfailed summary: Thanos Sidecar bucket operations are failing expr: | - rate(thanos_objstore_bucket_operation_failures_total{job=~"thanos-sidecar.*"}[5m]) > 0 + sum by (job, instance) (rate(thanos_objstore_bucket_operation_failures_total{job=~"thanos-sidecar.*"}[5m])) > 0 for: 5m labels: severity: critical - alert: ThanosSidecarUnhealthy annotations: - description: Thanos Sidecar {{$labels.job}} {{$labels.instance}} is unhealthy - for more than {{$value}} seconds. + description: Thanos Sidecar {{$labels.instance}} is unhealthy for {{$value}} seconds. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy summary: Thanos Sidecar is unhealthy. expr: | - time() - max by (job, instance) (timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{job=~"thanos-sidecar.*"})) >= 240 + time() - max by (job, instance) (thanos_sidecar_last_heartbeat_success_time_seconds{job=~"thanos-sidecar.*"}) >= 600 labels: severity: critical ``` @@ -342,38 +340,38 @@ name: thanos-query rules: - alert: ThanosQueryHttpRequestQueryErrorRateHigh annotations: - description: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize - }}% of "query" requests. + description: Thanos Query {{$labels.job}} is failing to handle {{$value | humanize}}% + of "query" requests. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryhttprequestqueryerrorratehigh summary: Thanos Query is failing to handle requests. expr: | ( - sum(rate(http_requests_total{code=~"5..", job=~"thanos-query.*", handler="query"}[5m])) + sum by (job) (rate(http_requests_total{code=~"5..", job=~"thanos-query.*", handler="query"}[5m])) / - sum(rate(http_requests_total{job=~"thanos-query.*", handler="query"}[5m])) + sum by (job) (rate(http_requests_total{job=~"thanos-query.*", handler="query"}[5m])) ) * 100 > 5 for: 5m labels: severity: critical - alert: ThanosQueryHttpRequestQueryRangeErrorRateHigh annotations: - description: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize - }}% of "query_range" requests. + description: Thanos Query {{$labels.job}} is failing to handle {{$value | humanize}}% + of "query_range" requests. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryhttprequestqueryrangeerrorratehigh summary: Thanos Query is failing to handle requests. expr: | ( - sum(rate(http_requests_total{code=~"5..", job=~"thanos-query.*", handler="query_range"}[5m])) + sum by (job) (rate(http_requests_total{code=~"5..", job=~"thanos-query.*", handler="query_range"}[5m])) / - sum(rate(http_requests_total{job=~"thanos-query.*", handler="query_range"}[5m])) + sum by (job) (rate(http_requests_total{job=~"thanos-query.*", handler="query_range"}[5m])) ) * 100 > 5 for: 5m labels: severity: critical - alert: ThanosQueryGrpcServerErrorRate annotations: - description: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize - }}% of requests. + description: Thanos Query {{$labels.job}} is failing to handle {{$value | humanize}}% + of requests. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosquerygrpcservererrorrate summary: Thanos Query is failing to handle requests. expr: | @@ -388,8 +386,8 @@ rules: severity: warning - alert: ThanosQueryGrpcClientErrorRate annotations: - description: Thanos Query {{$labels.job}} is failing to send {{ $value | humanize - }}% of requests. + description: Thanos Query {{$labels.job}} is failing to send {{$value | humanize}}% + of requests. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosquerygrpcclienterrorrate summary: Thanos Query is failing to send requests. expr: | @@ -403,7 +401,7 @@ rules: severity: warning - alert: ThanosQueryHighDNSFailures annotations: - description: Thanos Query {{$labels.job}} have {{ $value | humanize }}% of failing + description: Thanos Query {{$labels.job}} have {{$value | humanize}}% of failing DNS queries for store endpoints. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryhighdnsfailures summary: Thanos Query is having high number of DNS failures. @@ -418,8 +416,8 @@ rules: severity: warning - alert: ThanosQueryInstantLatencyHigh annotations: - description: Thanos Query {{$labels.job}} has a 99th percentile latency of {{ - $value }} seconds for instant queries. + description: Thanos Query {{$labels.job}} has a 99th percentile latency of {{$value}} + seconds for instant queries. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryinstantlatencyhigh summary: Thanos Query has high latency for queries. expr: | @@ -433,8 +431,8 @@ rules: severity: critical - alert: ThanosQueryRangeLatencyHigh annotations: - description: Thanos Query {{$labels.job}} has a 99th percentile latency of {{ - $value }} seconds for range queries. + description: Thanos Query {{$labels.job}} has a 99th percentile latency of {{$value}} + seconds for range queries. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryrangelatencyhigh summary: Thanos Query has high latency for queries. expr: | @@ -456,15 +454,15 @@ name: thanos-receive rules: - alert: ThanosReceiveHttpRequestErrorRateHigh annotations: - description: Thanos Receive {{$labels.job}} is failing to handle {{ $value | humanize - }}% of requests. + description: Thanos Receive {{$labels.job}} is failing to handle {{$value | humanize}}% + of requests. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehttprequesterrorratehigh summary: Thanos Receive is failing to handle requests. expr: | ( - sum(rate(http_requests_total{code=~"5..", job=~"thanos-receive.*", handler="receive"}[5m])) + sum by (job) (rate(http_requests_total{code=~"5..", job=~"thanos-receive.*", handler="receive"}[5m])) / - sum(rate(http_requests_total{job=~"thanos-receive.*", handler="receive"}[5m])) + sum by (job) (rate(http_requests_total{job=~"thanos-receive.*", handler="receive"}[5m])) ) * 100 > 5 for: 5m labels: @@ -486,8 +484,8 @@ rules: severity: critical - alert: ThanosReceiveHighReplicationFailures annotations: - description: Thanos Receive {{$labels.job}} is failing to replicate {{ $value - | humanize }}% of requests. + description: Thanos Receive {{$labels.job}} is failing to replicate {{$value | + humanize}}% of requests. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehighreplicationfailures summary: Thanos Receive is having high number of replication failures. expr: | @@ -511,8 +509,8 @@ rules: severity: warning - alert: ThanosReceiveHighForwardRequestFailures annotations: - description: Thanos Receive {{$labels.job}} is failing to forward {{ $value | - humanize }}% of requests. + description: Thanos Receive {{$labels.job}} is failing to forward {{$value | humanize}}% + of requests. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehighforwardrequestfailures summary: Thanos Receive is failing to forward requests. expr: | @@ -527,7 +525,7 @@ rules: - alert: ThanosReceiveHighHashringFileRefreshFailures annotations: description: Thanos Receive {{$labels.job}} is failing to refresh hashring file, - {{ $value | humanize }} of attempts failed. + {{$value | humanize}} of attempts failed. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehighhashringfilerefreshfailures summary: Thanos Receive is failing to refresh hasring file. expr: | @@ -546,21 +544,21 @@ rules: configurations. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceiveconfigreloadfailure summary: Thanos Receive has not been able to reload configuration. - expr: avg(thanos_receive_config_last_reload_successful{job=~"thanos-receive.*"}) - by (job) != 1 + expr: avg by (job) (thanos_receive_config_last_reload_successful{job=~"thanos-receive.*"}) + != 1 for: 5m labels: severity: warning - alert: ThanosReceiveNoUpload annotations: - description: Thanos Receive {{ $labels.instance }} of {{$labels.job}} has not - uploaded latest data to object storage. + description: Thanos Receive {{$labels.instance}} has not uploaded latest data + to object storage. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivenoupload summary: Thanos Receive has not uploaded latest data to object storage. expr: | (up{job=~"thanos-receive.*"} - 1) - + on (instance) # filters to only alert on current instance last 3h - (sum by (instance) (increase(thanos_shipper_uploads_total{job=~"thanos-receive.*"}[3h])) == 0) + + on (job, instance) # filters to only alert on current instance last 3h + (sum by (job, instance) (increase(thanos_shipper_uploads_total{job=~"thanos-receive.*"}[3h])) == 0) for: 3h labels: severity: critical @@ -572,27 +570,17 @@ rules: ```yaml name: thanos-bucket-replicate rules: -- alert: ThanosBucketReplicateIsDown - annotations: - description: Thanos Replicate has disappeared from Prometheus target discovery. - runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosbucketreplicateisdown - summary: Thanos Replicate has disappeared from Prometheus target discovery. - expr: | - absent(up{job=~"thanos-bucket-replicate.*"}) - for: 5m - labels: - severity: critical - alert: ThanosBucketReplicateErrorRate annotations: - description: Thanos Replicate failing to run, {{ $value | humanize }}% of attempts + description: Thanos Replicate is failing to run , {{$value | humanize}}% of attempts failed. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosbucketreplicateerrorrate - summary: Thanose Replicate is failing to run. + summary: Thanose Replicate is failing to run in . expr: | ( - sum(rate(thanos_replicate_replication_runs_total{result="error", job=~"thanos-bucket-replicate.*"}[5m])) - / on (namespace) group_left - sum(rate(thanos_replicate_replication_runs_total{job=~"thanos-bucket-replicate.*"}[5m])) + sum by (job) (rate(thanos_replicate_replication_runs_total{result="error", job=~"thanos-bucket-replicate.*"}[5m])) + / on (job) group_left + sum by (job) (rate(thanos_replicate_replication_runs_total{job=~"thanos-bucket-replicate.*"}[5m])) ) * 100 >= 10 for: 5m labels: @@ -600,12 +588,12 @@ rules: - alert: ThanosBucketReplicateRunLatency annotations: description: Thanos Replicate {{$labels.job}} has a 99th percentile latency of - {{ $value }} seconds for the replicate operations. + {{$value}} seconds for the replicate operations. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosbucketreplicaterunlatency summary: Thanos Replicate has a high latency for replicate operations. expr: | ( - histogram_quantile(0.99, sum by (job, le) (rate(thanos_replicate_replication_run_duration_seconds_bucket{job=~"thanos-bucket-replicate.*"}[5m]))) > 20 + histogram_quantile(0.99, sum by (job) (rate(thanos_replicate_replication_run_duration_seconds_bucket{job=~"thanos-bucket-replicate.*"}[5m]))) > 20 and sum by (job) (rate(thanos_replicate_replication_run_duration_seconds_bucket{job=~"thanos-bucket-replicate.*"}[5m])) > 0 ) @@ -624,9 +612,10 @@ name: thanos-component-absent rules: - alert: ThanosBucketReplicateIsDown annotations: - description: ThanosBucketReplicate has disappeared from Prometheus target discovery. + description: ThanosBucketReplicate has disappeared. Prometheus target for the + component cannot be discovered. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosbucketreplicateisdown - summary: thanos component has disappeared from Prometheus target discovery. + summary: Thanos component has disappeared. expr: | absent(up{job=~"thanos-bucket-replicate.*"} == 1) for: 5m @@ -634,9 +623,10 @@ rules: severity: critical - alert: ThanosCompactIsDown annotations: - description: ThanosCompact has disappeared from Prometheus target discovery. + description: ThanosCompact has disappeared. Prometheus target for the component + cannot be discovered. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompactisdown - summary: thanos component has disappeared from Prometheus target discovery. + summary: Thanos component has disappeared. expr: | absent(up{job=~"thanos-compact.*"} == 1) for: 5m @@ -644,9 +634,10 @@ rules: severity: critical - alert: ThanosQueryIsDown annotations: - description: ThanosQuery has disappeared from Prometheus target discovery. + description: ThanosQuery has disappeared. Prometheus target for the component + cannot be discovered. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryisdown - summary: thanos component has disappeared from Prometheus target discovery. + summary: Thanos component has disappeared. expr: | absent(up{job=~"thanos-query.*"} == 1) for: 5m @@ -654,9 +645,10 @@ rules: severity: critical - alert: ThanosReceiveIsDown annotations: - description: ThanosReceive has disappeared from Prometheus target discovery. + description: ThanosReceive has disappeared. Prometheus target for the component + cannot be discovered. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceiveisdown - summary: thanos component has disappeared from Prometheus target discovery. + summary: Thanos component has disappeared. expr: | absent(up{job=~"thanos-receive.*"} == 1) for: 5m @@ -664,9 +656,10 @@ rules: severity: critical - alert: ThanosRuleIsDown annotations: - description: ThanosRule has disappeared from Prometheus target discovery. + description: ThanosRule has disappeared. Prometheus target for the component cannot + be discovered. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosruleisdown - summary: thanos component has disappeared from Prometheus target discovery. + summary: Thanos component has disappeared. expr: | absent(up{job=~"thanos-rule.*"} == 1) for: 5m @@ -674,9 +667,10 @@ rules: severity: critical - alert: ThanosSidecarIsDown annotations: - description: ThanosSidecar has disappeared from Prometheus target discovery. + description: ThanosSidecar has disappeared. Prometheus target for the component + cannot be discovered. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarisdown - summary: thanos component has disappeared from Prometheus target discovery. + summary: Thanos component has disappeared. expr: | absent(up{job=~"thanos-sidecar.*"} == 1) for: 5m @@ -684,9 +678,10 @@ rules: severity: critical - alert: ThanosStoreIsDown annotations: - description: ThanosStore has disappeared from Prometheus target discovery. + description: ThanosStore has disappeared. Prometheus target for the component + cannot be discovered. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstoreisdown - summary: thanos component has disappeared from Prometheus target discovery. + summary: Thanos component has disappeared. expr: | absent(up{job=~"thanos-store.*"} == 1) for: 5m diff --git a/examples/alerts/alerts.yaml b/examples/alerts/alerts.yaml index 6d6e4a7a848..07b25c9d259 100644 --- a/examples/alerts/alerts.yaml +++ b/examples/alerts/alerts.yaml @@ -3,17 +3,17 @@ groups: rules: - alert: ThanosCompactMultipleRunning annotations: - description: No more than one Thanos Compact instance should be running at once. - There are {{ $value }} + description: 'No more than one Thanos Compact instance should be running at + once. There are {{$value}} ' runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompactmultiplerunning summary: Thanos Compact has multiple instances running. - expr: sum(up{job=~"thanos-compact.*"}) > 1 + expr: sum by (job) (up{job=~"thanos-compact.*"}) > 1 for: 5m labels: severity: warning - alert: ThanosCompactHalted annotations: - description: Thanos Compact {{$labels.job}} has failed to run and now is halted. + description: Thanos Compact {{$labels.job}} has failed to run and now is halted. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompacthalted summary: Thanos Compact has failed to run ans is now halted. expr: thanos_compact_halted{job=~"thanos-compact.*"} == 1 @@ -22,8 +22,8 @@ groups: severity: warning - alert: ThanosCompactHighCompactionFailures annotations: - description: Thanos Compact {{$labels.job}} is failing to execute {{ $value - | humanize }}% of compactions. + description: Thanos Compact {{$labels.job}} , is failing to execute {{$value + | humanize}}% of compactions. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompacthighcompactionfailures summary: Thanos Compact is failing to execute compactions. expr: | @@ -38,8 +38,8 @@ groups: severity: warning - alert: ThanosCompactBucketHighOperationFailures annotations: - description: Thanos Compact {{$labels.job}} Bucket is failing to execute {{ - $value | humanize }}% of operations. + description: Thanos Compact {{$labels.job}} , Bucket is failing to execute {{$value + | humanize}}% of operations. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompactbuckethighoperationfailures summary: Thanos Compact Bucket is having a high number of operation failures. expr: | @@ -54,11 +54,11 @@ groups: severity: warning - alert: ThanosCompactHasNotRun annotations: - description: Thanos Compact {{$labels.job}} has not uploaded anything for 24 + description: Thanos Compact {{$labels.job}} has not uploaded anything for 24 hours. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompacthasnotrun summary: Thanos Compact has not uploaded anything for last 24 hours. - expr: (time() - max(max_over_time(thanos_objstore_bucket_last_successful_upload_time{job=~"thanos-compact.*"}[24h]))) + expr: (time() - max by (job) (max_over_time(thanos_objstore_bucket_last_successful_upload_time{job=~"thanos-compact.*"}[24h]))) / 60 / 60 > 24 labels: severity: warning @@ -66,38 +66,38 @@ groups: rules: - alert: ThanosQueryHttpRequestQueryErrorRateHigh annotations: - description: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize - }}% of "query" requests. + description: Thanos Query {{$labels.job}} is failing to handle {{$value | humanize}}% + of "query" requests. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryhttprequestqueryerrorratehigh summary: Thanos Query is failing to handle requests. expr: | ( - sum(rate(http_requests_total{code=~"5..", job=~"thanos-query.*", handler="query"}[5m])) + sum by (job) (rate(http_requests_total{code=~"5..", job=~"thanos-query.*", handler="query"}[5m])) / - sum(rate(http_requests_total{job=~"thanos-query.*", handler="query"}[5m])) + sum by (job) (rate(http_requests_total{job=~"thanos-query.*", handler="query"}[5m])) ) * 100 > 5 for: 5m labels: severity: critical - alert: ThanosQueryHttpRequestQueryRangeErrorRateHigh annotations: - description: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize - }}% of "query_range" requests. + description: Thanos Query {{$labels.job}} is failing to handle {{$value | humanize}}% + of "query_range" requests. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryhttprequestqueryrangeerrorratehigh summary: Thanos Query is failing to handle requests. expr: | ( - sum(rate(http_requests_total{code=~"5..", job=~"thanos-query.*", handler="query_range"}[5m])) + sum by (job) (rate(http_requests_total{code=~"5..", job=~"thanos-query.*", handler="query_range"}[5m])) / - sum(rate(http_requests_total{job=~"thanos-query.*", handler="query_range"}[5m])) + sum by (job) (rate(http_requests_total{job=~"thanos-query.*", handler="query_range"}[5m])) ) * 100 > 5 for: 5m labels: severity: critical - alert: ThanosQueryGrpcServerErrorRate annotations: - description: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize - }}% of requests. + description: Thanos Query {{$labels.job}} is failing to handle {{$value | humanize}}% + of requests. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosquerygrpcservererrorrate summary: Thanos Query is failing to handle requests. expr: | @@ -112,8 +112,8 @@ groups: severity: warning - alert: ThanosQueryGrpcClientErrorRate annotations: - description: Thanos Query {{$labels.job}} is failing to send {{ $value | humanize - }}% of requests. + description: Thanos Query {{$labels.job}} is failing to send {{$value | humanize}}% + of requests. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosquerygrpcclienterrorrate summary: Thanos Query is failing to send requests. expr: | @@ -127,7 +127,7 @@ groups: severity: warning - alert: ThanosQueryHighDNSFailures annotations: - description: Thanos Query {{$labels.job}} have {{ $value | humanize }}% of failing + description: Thanos Query {{$labels.job}} have {{$value | humanize}}% of failing DNS queries for store endpoints. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryhighdnsfailures summary: Thanos Query is having high number of DNS failures. @@ -142,8 +142,8 @@ groups: severity: warning - alert: ThanosQueryInstantLatencyHigh annotations: - description: Thanos Query {{$labels.job}} has a 99th percentile latency of {{ - $value }} seconds for instant queries. + description: Thanos Query {{$labels.job}} has a 99th percentile latency of {{$value}} + seconds for instant queries. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryinstantlatencyhigh summary: Thanos Query has high latency for queries. expr: | @@ -157,8 +157,8 @@ groups: severity: critical - alert: ThanosQueryRangeLatencyHigh annotations: - description: Thanos Query {{$labels.job}} has a 99th percentile latency of {{ - $value }} seconds for range queries. + description: Thanos Query {{$labels.job}} has a 99th percentile latency of {{$value}} + seconds for range queries. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryrangelatencyhigh summary: Thanos Query has high latency for queries. expr: | @@ -174,15 +174,15 @@ groups: rules: - alert: ThanosReceiveHttpRequestErrorRateHigh annotations: - description: Thanos Receive {{$labels.job}} is failing to handle {{ $value | - humanize }}% of requests. + description: Thanos Receive {{$labels.job}} is failing to handle {{$value | + humanize}}% of requests. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehttprequesterrorratehigh summary: Thanos Receive is failing to handle requests. expr: | ( - sum(rate(http_requests_total{code=~"5..", job=~"thanos-receive.*", handler="receive"}[5m])) + sum by (job) (rate(http_requests_total{code=~"5..", job=~"thanos-receive.*", handler="receive"}[5m])) / - sum(rate(http_requests_total{job=~"thanos-receive.*", handler="receive"}[5m])) + sum by (job) (rate(http_requests_total{job=~"thanos-receive.*", handler="receive"}[5m])) ) * 100 > 5 for: 5m labels: @@ -204,8 +204,8 @@ groups: severity: critical - alert: ThanosReceiveHighReplicationFailures annotations: - description: Thanos Receive {{$labels.job}} is failing to replicate {{ $value - | humanize }}% of requests. + description: Thanos Receive {{$labels.job}} is failing to replicate {{$value + | humanize}}% of requests. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehighreplicationfailures summary: Thanos Receive is having high number of replication failures. expr: | @@ -229,8 +229,8 @@ groups: severity: warning - alert: ThanosReceiveHighForwardRequestFailures annotations: - description: Thanos Receive {{$labels.job}} is failing to forward {{ $value - | humanize }}% of requests. + description: Thanos Receive {{$labels.job}} is failing to forward {{$value | + humanize}}% of requests. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehighforwardrequestfailures summary: Thanos Receive is failing to forward requests. expr: | @@ -245,7 +245,7 @@ groups: - alert: ThanosReceiveHighHashringFileRefreshFailures annotations: description: Thanos Receive {{$labels.job}} is failing to refresh hashring file, - {{ $value | humanize }} of attempts failed. + {{$value | humanize}} of attempts failed. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehighhashringfilerefreshfailures summary: Thanos Receive is failing to refresh hasring file. expr: | @@ -264,21 +264,21 @@ groups: configurations. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceiveconfigreloadfailure summary: Thanos Receive has not been able to reload configuration. - expr: avg(thanos_receive_config_last_reload_successful{job=~"thanos-receive.*"}) - by (job) != 1 + expr: avg by (job) (thanos_receive_config_last_reload_successful{job=~"thanos-receive.*"}) + != 1 for: 5m labels: severity: warning - alert: ThanosReceiveNoUpload annotations: - description: Thanos Receive {{ $labels.instance }} of {{$labels.job}} has not - uploaded latest data to object storage. + description: Thanos Receive {{$labels.instance}} has not uploaded latest data + to object storage. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivenoupload summary: Thanos Receive has not uploaded latest data to object storage. expr: | (up{job=~"thanos-receive.*"} - 1) - + on (instance) # filters to only alert on current instance last 3h - (sum by (instance) (increase(thanos_shipper_uploads_total{job=~"thanos-receive.*"}[3h])) == 0) + + on (job, instance) # filters to only alert on current instance last 3h + (sum by (job, instance) (increase(thanos_shipper_uploads_total{job=~"thanos-receive.*"}[3h])) == 0) for: 3h labels: severity: critical @@ -286,42 +286,40 @@ groups: rules: - alert: ThanosSidecarPrometheusDown annotations: - description: Thanos Sidecar {{$labels.job}} {{$labels.instance}} cannot connect - to Prometheus. + description: Thanos Sidecar {{$labels.instance}} cannot connect to Prometheus. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarprometheusdown summary: Thanos Sidecar cannot connect to Prometheus expr: | - sum by (job, instance) (thanos_sidecar_prometheus_up{job=~"thanos-sidecar.*"} == 0) + thanos_sidecar_prometheus_up{job=~"thanos-sidecar.*"} == 0 for: 5m labels: severity: critical - alert: ThanosSidecarBucketOperationsFailed annotations: - description: Thanos Sidecar {{$labels.job}} {{$labels.instance}} bucket operations - are failing + description: Thanos Sidecar {{$labels.instance}} bucket operations are failing runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarbucketoperationsfailed summary: Thanos Sidecar bucket operations are failing expr: | - rate(thanos_objstore_bucket_operation_failures_total{job=~"thanos-sidecar.*"}[5m]) > 0 + sum by (job, instance) (rate(thanos_objstore_bucket_operation_failures_total{job=~"thanos-sidecar.*"}[5m])) > 0 for: 5m labels: severity: critical - alert: ThanosSidecarUnhealthy annotations: - description: Thanos Sidecar {{$labels.job}} {{$labels.instance}} is unhealthy - for more than {{$value}} seconds. + description: Thanos Sidecar {{$labels.instance}} is unhealthy for {{$value}} + seconds. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy summary: Thanos Sidecar is unhealthy. expr: | - time() - max by (job, instance) (timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{job=~"thanos-sidecar.*"})) >= 240 + time() - max by (job, instance) (thanos_sidecar_last_heartbeat_success_time_seconds{job=~"thanos-sidecar.*"}) >= 600 labels: severity: critical - name: thanos-store rules: - alert: ThanosStoreGrpcErrorRate annotations: - description: Thanos Store {{$labels.job}} is failing to handle {{ $value | humanize - }}% of requests. + description: Thanos Store {{$labels.job}} is failing to handle {{$value | humanize}}% + of requests. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstoregrpcerrorrate summary: Thanos Store is failing to handle qrpcd requests. expr: | @@ -336,8 +334,8 @@ groups: severity: warning - alert: ThanosStoreSeriesGateLatencyHigh annotations: - description: Thanos Store {{$labels.job}} has a 99th percentile latency of {{ - $value }} seconds for store series gate requests. + description: Thanos Store {{$labels.job}} has a 99th percentile latency of {{$value}} + seconds for store series gate requests. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstoreseriesgatelatencyhigh summary: Thanos Store has high latency for store series gate requests. expr: | @@ -351,8 +349,8 @@ groups: severity: warning - alert: ThanosStoreBucketHighOperationFailures annotations: - description: Thanos Store {{$labels.job}} Bucket is failing to execute {{ $value - | humanize }}% of operations. + description: Thanos Store {{$labels.job}} Bucket is failing to execute {{$value + | humanize}}% of operations. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstorebuckethighoperationfailures summary: Thanos Store Bucket is failing to execute operations. expr: | @@ -368,7 +366,7 @@ groups: - alert: ThanosStoreObjstoreOperationLatencyHigh annotations: description: Thanos Store {{$labels.job}} Bucket has a 99th percentile latency - of {{ $value }} seconds for the bucket operations. + of {{$value}} seconds for the bucket operations. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstoreobjstoreoperationlatencyhigh summary: Thanos Store is having high latency for bucket operations. expr: | @@ -384,34 +382,34 @@ groups: rules: - alert: ThanosRuleQueueIsDroppingAlerts annotations: - description: Thanos Rule {{$labels.job}} is failing to queue alerts. + description: Thanos Rule {{$labels.instance}} is failing to queue alerts. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulequeueisdroppingalerts summary: Thanos Rule is failing to queue alerts. expr: | - sum by (job) (rate(thanos_alert_queue_alerts_dropped_total{job=~"thanos-rule.*"}[5m])) > 0 + sum by (job, instance) (rate(thanos_alert_queue_alerts_dropped_total{job=~"thanos-rule.*"}[5m])) > 0 for: 5m labels: severity: critical - alert: ThanosRuleSenderIsFailingAlerts annotations: - description: Thanos Rule {{$labels.job}} is failing to send alerts to alertmanager. + description: Thanos Rule {{$labels.instance}} is failing to send alerts to alertmanager. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulesenderisfailingalerts summary: Thanos Rule is failing to send alerts to alertmanager. expr: | - sum by (job) (rate(thanos_alert_sender_alerts_dropped_total{job=~"thanos-rule.*"}[5m])) > 0 + sum by (job, instance) (rate(thanos_alert_sender_alerts_dropped_total{job=~"thanos-rule.*"}[5m])) > 0 for: 5m labels: severity: critical - alert: ThanosRuleHighRuleEvaluationFailures annotations: - description: Thanos Rule {{$labels.job}} is failing to evaluate rules. + description: Thanos Rule {{$labels.instance}} is failing to evaluate rules. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulehighruleevaluationfailures summary: Thanos Rule is failing to evaluate rules. expr: | ( - sum by (job) (rate(prometheus_rule_evaluation_failures_total{job=~"thanos-rule.*"}[5m])) + sum by (job, instance) (rate(prometheus_rule_evaluation_failures_total{job=~"thanos-rule.*"}[5m])) / - sum by (job) (rate(prometheus_rule_evaluations_total{job=~"thanos-rule.*"}[5m])) + sum by (job, instance) (rate(prometheus_rule_evaluations_total{job=~"thanos-rule.*"}[5m])) * 100 > 5 ) for: 5m @@ -419,18 +417,19 @@ groups: severity: critical - alert: ThanosRuleHighRuleEvaluationWarnings annotations: - description: Thanos Rule {{$labels.job}} has high number of evaluation warnings. + description: Thanos Rule {{$labels.instance}} has high number of evaluation + warnings. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulehighruleevaluationwarnings summary: Thanos Rule has high number of evaluation warnings. expr: | - sum by (job) (rate(thanos_rule_evaluation_with_warnings_total{job=~"thanos-rule.*"}[5m])) > 0 + sum by (job, instance) (rate(thanos_rule_evaluation_with_warnings_total{job=~"thanos-rule.*"}[5m])) > 0 for: 15m labels: severity: info - alert: ThanosRuleRuleEvaluationLatencyHigh annotations: - description: Thanos Rule {{$labels.job}}/{{$labels.instance}} has higher evaluation - latency than interval for {{$labels.rule_group}}. + description: Thanos Rule {{$labels.instance}} has higher evaluation latency + than interval for {{$labels.rule_group}}. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosruleruleevaluationlatencyhigh summary: Thanos Rule has high rule evaluation latency. expr: | @@ -444,15 +443,15 @@ groups: severity: warning - alert: ThanosRuleGrpcErrorRate annotations: - description: Thanos Rule {{$labels.job}} is failing to handle {{ $value | humanize - }}% of requests. + description: Thanos Rule {{$labels.job}} is failing to handle {{$value | humanize}}% + of requests. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulegrpcerrorrate summary: Thanos Rule is failing to handle grpc requests. expr: | ( - sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-rule.*"}[5m])) + sum by (job, instance) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-rule.*"}[5m])) / - sum by (job) (rate(grpc_server_started_total{job=~"thanos-rule.*"}[5m])) + sum by (job, instance) (rate(grpc_server_started_total{job=~"thanos-rule.*"}[5m])) * 100 > 5 ) for: 5m @@ -463,22 +462,22 @@ groups: description: Thanos Rule {{$labels.job}} has not been able to reload its configuration. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosruleconfigreloadfailure summary: Thanos Rule has not been able to reload configuration. - expr: avg(thanos_rule_config_last_reload_successful{job=~"thanos-rule.*"}) by - (job) != 1 + expr: avg by (job, instance) (thanos_rule_config_last_reload_successful{job=~"thanos-rule.*"}) + != 1 for: 5m labels: severity: info - alert: ThanosRuleQueryHighDNSFailures annotations: - description: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing + description: Thanos Rule {{$labels.job}} has {{$value | humanize}}% of failing DNS queries for query endpoints. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulequeryhighdnsfailures summary: Thanos Rule is having high number of DNS failures. expr: | ( - sum by (job) (rate(thanos_rule_query_apis_dns_failures_total{job=~"thanos-rule.*"}[5m])) + sum by (job, instance) (rate(thanos_rule_query_apis_dns_failures_total{job=~"thanos-rule.*"}[5m])) / - sum by (job) (rate(thanos_rule_query_apis_dns_lookups_total{job=~"thanos-rule.*"}[5m])) + sum by (job, instance) (rate(thanos_rule_query_apis_dns_lookups_total{job=~"thanos-rule.*"}[5m])) * 100 > 1 ) for: 15m @@ -486,15 +485,15 @@ groups: severity: warning - alert: ThanosRuleAlertmanagerHighDNSFailures annotations: - description: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing - DNS queries for Alertmanager endpoints. + description: Thanos Rule {{$labels.instance}} has {{$value | humanize}}% of + failing DNS queries for Alertmanager endpoints. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulealertmanagerhighdnsfailures summary: Thanos Rule is having high number of DNS failures. expr: | ( - sum by (job) (rate(thanos_rule_alertmanagers_dns_failures_total{job=~"thanos-rule.*"}[5m])) + sum by (job, instance) (rate(thanos_rule_alertmanagers_dns_failures_total{job=~"thanos-rule.*"}[5m])) / - sum by (job) (rate(thanos_rule_alertmanagers_dns_lookups_total{job=~"thanos-rule.*"}[5m])) + sum by (job, instance) (rate(thanos_rule_alertmanagers_dns_lookups_total{job=~"thanos-rule.*"}[5m])) * 100 > 1 ) for: 15m @@ -502,53 +501,43 @@ groups: severity: warning - alert: ThanosRuleNoEvaluationFor10Intervals annotations: - description: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% rule groups + description: Thanos Rule {{$labels.job}} has {{$value | humanize}}% rule groups that did not evaluate for at least 10x of their expected interval. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulenoevaluationfor10intervals summary: Thanos Rule has rule groups that did not evaluate for 10 intervals. expr: | - time() - max by (job, group) (prometheus_rule_group_last_evaluation_timestamp_seconds{job=~"thanos-rule.*"}) + time() - max by (job, instance, group) (prometheus_rule_group_last_evaluation_timestamp_seconds{job=~"thanos-rule.*"}) > - 10 * max by (job, group) (prometheus_rule_group_interval_seconds{job=~"thanos-rule.*"}) + 10 * max by (job, instance, group) (prometheus_rule_group_interval_seconds{job=~"thanos-rule.*"}) for: 5m labels: severity: info - alert: ThanosNoRuleEvaluations annotations: - description: Thanos Rule {{$labels.job}} did not perform any rule evaluations - in the past 2 minutes. + description: Thanos Rule {{$labels.instance}} did not perform any rule evaluations + in the past 10 minutes. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosnoruleevaluations summary: Thanos Rule did not perform any rule evaluations. expr: | - sum(rate(prometheus_rule_evaluations_total{job=~"thanos-rule.*"}[2m])) <= 0 + sum by (job, instance) (rate(prometheus_rule_evaluations_total{job=~"thanos-rule.*"}[5m])) <= 0 and - sum(thanos_rule_loaded_rules{job=~"thanos-rule.*"}) > 0 - for: 3m + sum by (job, instance) (thanos_rule_loaded_rules{job=~"thanos-rule.*"}) > 0 + for: 5m labels: severity: critical - name: thanos-bucket-replicate rules: - - alert: ThanosBucketReplicateIsDown - annotations: - description: Thanos Replicate has disappeared from Prometheus target discovery. - runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosbucketreplicateisdown - summary: Thanos Replicate has disappeared from Prometheus target discovery. - expr: | - absent(up{job=~"thanos-bucket-replicate.*"}) - for: 5m - labels: - severity: critical - alert: ThanosBucketReplicateErrorRate annotations: - description: Thanos Replicate failing to run, {{ $value | humanize }}% of attempts - failed. + description: Thanos Replicate is failing to run , {{$value | humanize}}% of + attempts failed. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosbucketreplicateerrorrate - summary: Thanose Replicate is failing to run. + summary: Thanose Replicate is failing to run in . expr: | ( - sum(rate(thanos_replicate_replication_runs_total{result="error", job=~"thanos-bucket-replicate.*"}[5m])) - / on (namespace) group_left - sum(rate(thanos_replicate_replication_runs_total{job=~"thanos-bucket-replicate.*"}[5m])) + sum by (job) (rate(thanos_replicate_replication_runs_total{result="error", job=~"thanos-bucket-replicate.*"}[5m])) + / on (job) group_left + sum by (job) (rate(thanos_replicate_replication_runs_total{job=~"thanos-bucket-replicate.*"}[5m])) ) * 100 >= 10 for: 5m labels: @@ -556,12 +545,12 @@ groups: - alert: ThanosBucketReplicateRunLatency annotations: description: Thanos Replicate {{$labels.job}} has a 99th percentile latency - of {{ $value }} seconds for the replicate operations. + of {{$value}} seconds for the replicate operations. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosbucketreplicaterunlatency summary: Thanos Replicate has a high latency for replicate operations. expr: | ( - histogram_quantile(0.99, sum by (job, le) (rate(thanos_replicate_replication_run_duration_seconds_bucket{job=~"thanos-bucket-replicate.*"}[5m]))) > 20 + histogram_quantile(0.99, sum by (job) (rate(thanos_replicate_replication_run_duration_seconds_bucket{job=~"thanos-bucket-replicate.*"}[5m]))) > 20 and sum by (job) (rate(thanos_replicate_replication_run_duration_seconds_bucket{job=~"thanos-bucket-replicate.*"}[5m])) > 0 ) @@ -572,9 +561,10 @@ groups: rules: - alert: ThanosBucketReplicateIsDown annotations: - description: ThanosBucketReplicate has disappeared from Prometheus target discovery. + description: ThanosBucketReplicate has disappeared. Prometheus target for the + component cannot be discovered. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosbucketreplicateisdown - summary: thanos component has disappeared from Prometheus target discovery. + summary: Thanos component has disappeared. expr: | absent(up{job=~"thanos-bucket-replicate.*"} == 1) for: 5m @@ -582,9 +572,10 @@ groups: severity: critical - alert: ThanosCompactIsDown annotations: - description: ThanosCompact has disappeared from Prometheus target discovery. + description: ThanosCompact has disappeared. Prometheus target for the component + cannot be discovered. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompactisdown - summary: thanos component has disappeared from Prometheus target discovery. + summary: Thanos component has disappeared. expr: | absent(up{job=~"thanos-compact.*"} == 1) for: 5m @@ -592,9 +583,10 @@ groups: severity: critical - alert: ThanosQueryIsDown annotations: - description: ThanosQuery has disappeared from Prometheus target discovery. + description: ThanosQuery has disappeared. Prometheus target for the component + cannot be discovered. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryisdown - summary: thanos component has disappeared from Prometheus target discovery. + summary: Thanos component has disappeared. expr: | absent(up{job=~"thanos-query.*"} == 1) for: 5m @@ -602,9 +594,10 @@ groups: severity: critical - alert: ThanosReceiveIsDown annotations: - description: ThanosReceive has disappeared from Prometheus target discovery. + description: ThanosReceive has disappeared. Prometheus target for the component + cannot be discovered. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceiveisdown - summary: thanos component has disappeared from Prometheus target discovery. + summary: Thanos component has disappeared. expr: | absent(up{job=~"thanos-receive.*"} == 1) for: 5m @@ -612,9 +605,10 @@ groups: severity: critical - alert: ThanosRuleIsDown annotations: - description: ThanosRule has disappeared from Prometheus target discovery. + description: ThanosRule has disappeared. Prometheus target for the component + cannot be discovered. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosruleisdown - summary: thanos component has disappeared from Prometheus target discovery. + summary: Thanos component has disappeared. expr: | absent(up{job=~"thanos-rule.*"} == 1) for: 5m @@ -622,9 +616,10 @@ groups: severity: critical - alert: ThanosSidecarIsDown annotations: - description: ThanosSidecar has disappeared from Prometheus target discovery. + description: ThanosSidecar has disappeared. Prometheus target for the component + cannot be discovered. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarisdown - summary: thanos component has disappeared from Prometheus target discovery. + summary: Thanos component has disappeared. expr: | absent(up{job=~"thanos-sidecar.*"} == 1) for: 5m @@ -632,9 +627,10 @@ groups: severity: critical - alert: ThanosStoreIsDown annotations: - description: ThanosStore has disappeared from Prometheus target discovery. + description: ThanosStore has disappeared. Prometheus target for the component + cannot be discovered. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstoreisdown - summary: thanos component has disappeared from Prometheus target discovery. + summary: Thanos component has disappeared. expr: | absent(up{job=~"thanos-store.*"} == 1) for: 5m diff --git a/examples/alerts/rules.yaml b/examples/alerts/rules.yaml index 1c7b7259821..a8b6ca64ec9 100644 --- a/examples/alerts/rules.yaml +++ b/examples/alerts/rules.yaml @@ -3,35 +3,35 @@ groups: rules: - expr: | ( - sum(rate(grpc_client_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-query.*", grpc_type="unary"}[5m])) + sum by (job) (rate(grpc_client_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-query.*", grpc_type="unary"}[5m])) / - sum(rate(grpc_client_started_total{job=~"thanos-query.*", grpc_type="unary"}[5m])) + sum by (job) (rate(grpc_client_started_total{job=~"thanos-query.*", grpc_type="unary"}[5m])) ) record: :grpc_client_failures_per_unary:sum_rate - expr: | ( - sum(rate(grpc_client_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-query.*", grpc_type="server_stream"}[5m])) + sum by (job) (rate(grpc_client_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-query.*", grpc_type="server_stream"}[5m])) / - sum(rate(grpc_client_started_total{job=~"thanos-query.*", grpc_type="server_stream"}[5m])) + sum by (job) (rate(grpc_client_started_total{job=~"thanos-query.*", grpc_type="server_stream"}[5m])) ) record: :grpc_client_failures_per_stream:sum_rate - expr: | ( - sum(rate(thanos_query_store_apis_dns_failures_total{job=~"thanos-query.*"}[5m])) + sum by (job) (rate(thanos_query_store_apis_dns_failures_total{job=~"thanos-query.*"}[5m])) / - sum(rate(thanos_query_store_apis_dns_lookups_total{job=~"thanos-query.*"}[5m])) + sum by (job) (rate(thanos_query_store_apis_dns_lookups_total{job=~"thanos-query.*"}[5m])) ) record: :thanos_query_store_apis_dns_failures_per_lookup:sum_rate - expr: | histogram_quantile(0.99, - sum(rate(http_request_duration_seconds_bucket{job=~"thanos-query.*", handler="query"}[5m])) by (le) + sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~"thanos-query.*", handler="query"}[5m])) ) labels: quantile: "0.99" record: :query_duration_seconds:histogram_quantile - expr: | histogram_quantile(0.99, - sum(rate(http_request_duration_seconds_bucket{job=~"thanos-query.*", handler="query_range"}[5m])) by (le) + sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~"thanos-query.*", handler="query_range"}[5m])) ) labels: quantile: "0.99" @@ -39,80 +39,80 @@ groups: - name: thanos-receive.rules rules: - expr: | - sum( - rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-receive.*", grpc_type="unary"}[5m]) + ( + sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-receive.*", grpc_type="unary"}[5m])) / - rate(grpc_server_started_total{job=~"thanos-receive.*", grpc_type="unary"}[5m]) + sum by (job) (rate(grpc_server_started_total{job=~"thanos-receive.*", grpc_type="unary"}[5m])) ) record: :grpc_server_failures_per_unary:sum_rate - expr: | - sum( - rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-receive.*", grpc_type="server_stream"}[5m]) + ( + sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-receive.*", grpc_type="server_stream"}[5m])) / - rate(grpc_server_started_total{job=~"thanos-receive.*", grpc_type="server_stream"}[5m]) + sum by (job) (rate(grpc_server_started_total{job=~"thanos-receive.*", grpc_type="server_stream"}[5m])) ) record: :grpc_server_failures_per_stream:sum_rate - expr: | - sum( - rate(http_requests_total{handler="receive", job=~"thanos-receive.*", code!~"5.."}[5m]) + ( + sum by (job) (rate(http_requests_total{handler="receive", job=~"thanos-receive.*", code!~"5.."}[5m])) / - rate(http_requests_total{handler="receive", job=~"thanos-receive.*"}[5m]) + sum by (job) (rate(http_requests_total{handler="receive", job=~"thanos-receive.*"}[5m])) ) record: :http_failure_per_request:sum_rate - expr: | histogram_quantile(0.99, - sum(rate(http_request_duration_seconds_bucket{handler="receive", job=~"thanos-receive.*"}[5m])) by (le) + sum by (job, le) (rate(http_request_duration_seconds_bucket{handler="receive", job=~"thanos-receive.*"}[5m])) ) labels: quantile: "0.99" record: :http_request_duration_seconds:histogram_quantile - expr: | ( - sum(rate(thanos_receive_replications_total{result="error", job=~"thanos-receive.*"}[5m])) + sum by (job) (rate(thanos_receive_replications_total{result="error", job=~"thanos-receive.*"}[5m])) / - sum(rate(thanos_receive_replications_total{job=~"thanos-receive.*"}[5m])) + sum by (job) (rate(thanos_receive_replications_total{job=~"thanos-receive.*"}[5m])) ) record: :thanos_receive_replication_failure_per_requests:sum_rate - expr: | ( - sum(rate(thanos_receive_forward_requests_total{result="error", job=~"thanos-receive.*"}[5m])) + sum by (job) (rate(thanos_receive_forward_requests_total{result="error", job=~"thanos-receive.*"}[5m])) / - sum(rate(thanos_receive_forward_requests_total{job=~"thanos-receive.*"}[5m])) + sum by (job) (rate(thanos_receive_forward_requests_total{job=~"thanos-receive.*"}[5m])) ) record: :thanos_receive_forward_failure_per_requests:sum_rate - expr: | ( - sum(rate(thanos_receive_hashrings_file_errors_total{job=~"thanos-receive.*"}[5m])) + sum by (job) (rate(thanos_receive_hashrings_file_errors_total{job=~"thanos-receive.*"}[5m])) / - sum(rate(thanos_receive_hashrings_file_refreshes_total{job=~"thanos-receive.*"}[5m])) + sum by (job) (rate(thanos_receive_hashrings_file_refreshes_total{job=~"thanos-receive.*"}[5m])) ) record: :thanos_receive_hashring_file_failure_per_refresh:sum_rate - name: thanos-store.rules rules: - expr: | ( - sum(rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-store.*", grpc_type="unary"}[5m])) + sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-store.*", grpc_type="unary"}[5m])) / - sum(rate(grpc_server_started_total{job=~"thanos-store.*", grpc_type="unary"}[5m])) + sum by (job) (rate(grpc_server_started_total{job=~"thanos-store.*", grpc_type="unary"}[5m])) ) record: :grpc_server_failures_per_unary:sum_rate - expr: | ( - sum(rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-store.*", grpc_type="server_stream"}[5m])) + sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-store.*", grpc_type="server_stream"}[5m])) / - sum(rate(grpc_server_started_total{job=~"thanos-store.*", grpc_type="server_stream"}[5m])) + sum by (job) (rate(grpc_server_started_total{job=~"thanos-store.*", grpc_type="server_stream"}[5m])) ) record: :grpc_server_failures_per_stream:sum_rate - expr: | ( - sum(rate(thanos_objstore_bucket_operation_failures_total{job=~"thanos-store.*"}[5m])) + sum by (job) (rate(thanos_objstore_bucket_operation_failures_total{job=~"thanos-store.*"}[5m])) / - sum(rate(thanos_objstore_bucket_operations_total{job=~"thanos-store.*"}[5m])) + sum by (job) (rate(thanos_objstore_bucket_operations_total{job=~"thanos-store.*"}[5m])) ) record: :thanos_objstore_bucket_failures_per_operation:sum_rate - expr: | histogram_quantile(0.99, - sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~"thanos-store.*"}[5m])) by (le) + sum by (job, le) (rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~"thanos-store.*"}[5m])) ) labels: quantile: "0.99" diff --git a/examples/alerts/tests.yaml b/examples/alerts/tests.yaml index d65135f9ee3..7aace50da6b 100644 --- a/examples/alerts/tests.yaml +++ b/examples/alerts/tests.yaml @@ -8,9 +8,9 @@ tests: - interval: 1m input_series: - series: 'thanos_sidecar_last_heartbeat_success_time_seconds{namespace="production", job="thanos-sidecar", instance="thanos-sidecar-0"}' - values: '5 10 43 17 11 _x5 0x10' + values: '5 10 43 17 11 0 0 0' - series: 'thanos_sidecar_last_heartbeat_success_time_seconds{namespace="production", job="thanos-sidecar", instance="thanos-sidecar-1"}' - values: '4 9 42 15 10 _x5 0x10' + values: '4 9 42 15 10 0 0 0' promql_expr_test: - expr: time() eval_time: 1m @@ -22,61 +22,145 @@ tests: exp_samples: - labels: '{}' value: 120 - - expr: time() - max(timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{job="thanos-sidecar"})) by (job, instance) - eval_time: 5m + - expr: max(thanos_sidecar_last_heartbeat_success_time_seconds{job="thanos-sidecar"}) by (job, instance) + eval_time: 2m exp_samples: - labels: '{job="thanos-sidecar", instance="thanos-sidecar-0"}' - value: 60 + value: 43 - labels: '{job="thanos-sidecar", instance="thanos-sidecar-1"}' - value: 60 - - expr: time() - max(timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{job="thanos-sidecar"})) by (job, instance) - eval_time: 6m + value: 42 + - expr: max(thanos_sidecar_last_heartbeat_success_time_seconds{job="thanos-sidecar"}) by (job, instance) + eval_time: 10m exp_samples: - labels: '{job="thanos-sidecar", instance="thanos-sidecar-0"}' - value: 120 + value: 0 - labels: '{job="thanos-sidecar", instance="thanos-sidecar-1"}' - value: 120 - - expr: time() - max(timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{job="thanos-sidecar"})) by (job, instance) - eval_time: 7m + value: 0 + - expr: max(thanos_sidecar_last_heartbeat_success_time_seconds{job="thanos-sidecar"}) by (job, instance) + eval_time: 11m + exp_samples: + - labels: '{job="thanos-sidecar", instance="thanos-sidecar-0"}' + value: 0 + - labels: '{job="thanos-sidecar", instance="thanos-sidecar-1"}' + value: 0 + - expr: time() - max(thanos_sidecar_last_heartbeat_success_time_seconds{job="thanos-sidecar"}) by (job, instance) + eval_time: 10m + exp_samples: + - labels: '{job="thanos-sidecar", instance="thanos-sidecar-0"}' + value: 600 + - labels: '{job="thanos-sidecar", instance="thanos-sidecar-1"}' + value: 600 + - expr: time() - max(thanos_sidecar_last_heartbeat_success_time_seconds{job="thanos-sidecar"}) by (job, instance) + eval_time: 11m exp_samples: - labels: '{job="thanos-sidecar", instance="thanos-sidecar-0"}' - value: 180 + value: 660 - labels: '{job="thanos-sidecar", instance="thanos-sidecar-1"}' - value: 180 - - expr: time() - max(timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{job="thanos-sidecar"})) by (job, instance) - eval_time: 8m + value: 660 + - expr: time() - max(thanos_sidecar_last_heartbeat_success_time_seconds{job="thanos-sidecar"}) by (job, instance) >= 600 + eval_time: 12m exp_samples: - labels: '{job="thanos-sidecar", instance="thanos-sidecar-0"}' - value: 240 + value: 720 - labels: '{job="thanos-sidecar", instance="thanos-sidecar-1"}' - value: 240 + value: 720 + alert_rule_test: + - eval_time: 1m + alertname: ThanosSidecarUnhealthy + - eval_time: 2m + alertname: ThanosSidecarUnhealthy + - eval_time: 3m + alertname: ThanosSidecarUnhealthy + - eval_time: 10m + alertname: ThanosSidecarUnhealthy + exp_alerts: + - exp_labels: + severity: critical + job: thanos-sidecar + instance: thanos-sidecar-0 + exp_annotations: + description: 'Thanos Sidecar thanos-sidecar-0 is unhealthy for 600 seconds.' + runbook_url: 'https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy' + summary: 'Thanos Sidecar is unhealthy.' + - exp_labels: + severity: critical + job: thanos-sidecar + instance: thanos-sidecar-1 + exp_annotations: + description: 'Thanos Sidecar thanos-sidecar-1 is unhealthy for 600 seconds.' + runbook_url: 'https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy' + summary: 'Thanos Sidecar is unhealthy.' + - eval_time: 11m + alertname: ThanosSidecarUnhealthy + exp_alerts: + - exp_labels: + severity: critical + job: thanos-sidecar + instance: thanos-sidecar-0 + exp_annotations: + description: 'Thanos Sidecar thanos-sidecar-0 is unhealthy for 660 seconds.' + runbook_url: 'https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy' + summary: 'Thanos Sidecar is unhealthy.' + - exp_labels: + severity: critical + job: thanos-sidecar + instance: thanos-sidecar-1 + exp_annotations: + description: 'Thanos Sidecar thanos-sidecar-1 is unhealthy for 660 seconds.' + runbook_url: 'https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy' + summary: 'Thanos Sidecar is unhealthy.' + - eval_time: 12m + alertname: ThanosSidecarUnhealthy + exp_alerts: + - exp_labels: + severity: critical + job: thanos-sidecar + instance: thanos-sidecar-0 + exp_annotations: + description: 'Thanos Sidecar thanos-sidecar-0 is unhealthy for 720 seconds.' + runbook_url: 'https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy' + summary: 'Thanos Sidecar is unhealthy.' + - exp_labels: + severity: critical + job: thanos-sidecar + instance: thanos-sidecar-1 + exp_annotations: + description: 'Thanos Sidecar thanos-sidecar-1 is unhealthy for 720 seconds.' + runbook_url: 'https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy' + summary: 'Thanos Sidecar is unhealthy.' +- interval: 1m + input_series: + - series: 'prometheus_rule_evaluations_total{namespace="production", job="thanos-ruler", instance="thanos-ruler-0"}' + values: '5x7 0x8' + - series: 'prometheus_rule_evaluations_total{namespace="production", job="thanos-ruler", instance="thanos-ruler-1"}' + values: '5x7 0x8' + - series: 'thanos_rule_loaded_rules{namespace="production", job="thanos-ruler", instance="thanos-ruler-0"}' + values: '0x5 5x10' + - series: 'thanos_rule_loaded_rules{namespace="production", job="thanos-ruler", instance="thanos-ruler-1"}' + values: '0x5 5x10' alert_rule_test: - eval_time: 1m - alertname: ThanosSidecarUnhealthy - - eval_time: 2m - alertname: ThanosSidecarUnhealthy - - eval_time: 3m - alertname: ThanosSidecarUnhealthy + alertname: ThanosNoRuleEvaluations - eval_time: 5m - alertname: ThanosSidecarUnhealthy - - eval_time: 8m - alertname: ThanosSidecarUnhealthy + alertname: ThanosNoRuleEvaluations + - eval_time: 6m + alertname: ThanosNoRuleEvaluations + - eval_time: 11m + alertname: ThanosNoRuleEvaluations exp_alerts: - exp_labels: severity: critical - job: thanos-sidecar - instance: thanos-sidecar-0 + job: thanos-ruler + instance: thanos-ruler-0 exp_annotations: - description: 'Thanos Sidecar thanos-sidecar thanos-sidecar-0 is unhealthy for more than 240 seconds.' - runbook_url: 'https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy' - summary: 'Thanos Sidecar is unhealthy.' + description: 'Thanos Rule thanos-ruler-0 did not perform any rule evaluations in the past 10 minutes.' + runbook_url: 'https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosnoruleevaluations' + summary: 'Thanos Rule did not perform any rule evaluations.' - exp_labels: severity: critical - job: thanos-sidecar - instance: thanos-sidecar-1 + job: thanos-ruler + instance: thanos-ruler-1 exp_annotations: - description: 'Thanos Sidecar thanos-sidecar thanos-sidecar-1 is unhealthy for more than 240 seconds.' - runbook_url: 'https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy' - summary: 'Thanos Sidecar is unhealthy.' - - eval_time: 10m - alertname: ThanosSidecarUnhealthy + description: 'Thanos Rule thanos-ruler-1 did not perform any rule evaluations in the past 10 minutes.' + runbook_url: 'https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosnoruleevaluations' + summary: 'Thanos Rule did not perform any rule evaluations.' diff --git a/examples/dashboards/bucket_replicate.json b/examples/dashboards/bucket_replicate.json index 4d6d339bb72..1c3ecb00766 100644 --- a/examples/dashboards/bucket_replicate.json +++ b/examples/dashboards/bucket_replicate.json @@ -47,7 +47,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_replicate_replication_runs_total{result=\"error\", namespace=\"$namespace\",job=~\"thanos-bucket-replicate.*\"}[$interval])) / sum(rate(thanos_replicate_replication_runs_total{namespace=\"$namespace\",job=~\"thanos-bucket-replicate.*\"}[$interval]))", + "expr": "sum by (job) (rate(thanos_replicate_replication_runs_total{result=\"error\", job=\"$job\"}[$interval])) / sum by (job) (rate(thanos_replicate_replication_runs_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -124,7 +124,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_replicate_replication_runs_total{result=\"error\", namespace=\"$namespace\",job=~\"thanos-bucket-replicate.*\"}[$interval])) by (result)", + "expr": "sum by (job, result) (rate(thanos_replicate_replication_runs_total{result=\"error\", job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{result}}", @@ -194,34 +194,62 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_replicate_replication_run_duration_seconds_bucket{result=\"success\", namespace=\"$namespace\",job=~\"thanos-bucket-replicate.*\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(thanos_replicate_replication_run_duration_seconds_bucket{result=\"success\", job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P99 {{job}}", + "legendFormat": "p50 {{job}}", + "logBase": 10, + "max": null, + "min": null, "refId": "A", "step": 10 }, { - "expr": "sum(rate(thanos_replicate_replication_run_duration_seconds_sum{result=\"success\", namespace=\"$namespace\",job=~\"thanos-bucket-replicate.*\"}[$interval])) by (job) * 1 / sum(rate(thanos_replicate_replication_run_duration_seconds_count{result=\"success\", namespace=\"$namespace\",job=~\"thanos-bucket-replicate.*\"}[$interval])) by (job)", + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(thanos_replicate_replication_run_duration_seconds_bucket{result=\"success\", job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "mean {{job}}", - "refId": "B", + "legendFormat": "p90 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_replicate_replication_run_duration_seconds_bucket{result=\"success\", namespace=\"$namespace\",job=~\"thanos-bucket-replicate.*\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(thanos_replicate_replication_run_duration_seconds_bucket{result=\"success\", job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P50 {{job}}", - "refId": "C", + "legendFormat": "p99 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 } ], @@ -305,7 +333,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(blocks_meta_synced{state=\"loaded\",namespace=\"$namespace\",job=~\"thanos-bucket-replicate.*\"}[$interval]))", + "expr": "sum by (job) (rate(blocks_meta_synced{state=\"loaded\", job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "meta loads", @@ -313,7 +341,7 @@ "step": 10 }, { - "expr": "sum(rate(blocks_meta_synced{state=\"failed\",namespace=\"$namespace\",job=~\"thanos-bucket-replicate.*\"}[$interval]))", + "expr": "sum by (job) (rate(blocks_meta_synced{state=\"failed\", job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "partial meta reads", @@ -321,7 +349,7 @@ "step": 10 }, { - "expr": "sum(rate(thanos_replicate_blocks_already_replicated_total{namespace=\"$namespace\",job=~\"thanos-bucket-replicate.*\"}[$interval]))", + "expr": "sum by (job) (rate(thanos_replicate_blocks_already_replicated_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "already replicated blocks", @@ -329,7 +357,7 @@ "step": 10 }, { - "expr": "sum(rate(thanos_replicate_blocks_replicated_total{namespace=\"$namespace\",job=~\"thanos-bucket-replicate.*\"}[$interval]))", + "expr": "sum by (job) (rate(thanos_replicate_blocks_replicated_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "replicated blocks", @@ -337,7 +365,7 @@ "step": 10 }, { - "expr": "sum(rate(thanos_replicate_objects_replicated_total{namespace=\"$namespace\",job=~\"thanos-bucket-replicate.*\"}[$interval]))", + "expr": "sum by (job) (rate(thanos_replicate_objects_replicated_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "replicated objects", @@ -399,8 +427,8 @@ "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": null, @@ -412,27 +440,22 @@ "type": "datasource" }, { - "allValue": null, - "current": { }, - "datasource": "$datasource", + "auto": true, + "auto_count": 300, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(thanos_status{}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "label": "interval", + "name": "interval", + "query": "5m,10m,30m,1h,6h,12h", + "refresh": 2, + "type": "interval" }, { - "allValue": "thanos-bucket-replicate.*", + "allValue": null, "current": { "text": "all", "value": "$__all" @@ -444,7 +467,7 @@ "multi": false, "name": "job", "options": [ ], - "query": "label_values(up{namespace=\"$namespace\", job=~\"thanos-bucket-replicate.*\"}, job)", + "query": "label_values(up{job=~\"thanos-bucket-replicate.*\"}, job)", "refresh": 1, "regex": "", "sort": 2, @@ -453,21 +476,6 @@ "tagsQuery": "", "type": "query", "useTags": false - }, - { - "auto": true, - "auto_count": 300, - "auto_min": "10s", - "current": { - "text": "5m", - "value": "5m" - }, - "hide": 0, - "label": "interval", - "name": "interval", - "query": "5m,10m,30m,1h,6h,12h", - "refresh": 2, - "type": "interval" } ] }, diff --git a/examples/dashboards/compact.json b/examples/dashboards/compact.json index 99653ecf00a..db0adb4dc6c 100644 --- a/examples/dashboards/compact.json +++ b/examples/dashboards/compact.json @@ -46,7 +46,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_compact_group_compactions_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, group)", + "expr": "sum by (job, group) (rate(thanos_compact_group_compactions_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "compaction {{job}} {{group}}", @@ -125,7 +125,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_compact_group_compactions_failures_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) / sum(rate(thanos_compact_group_compactions_total{namespace=\"$namespace\",job=~\"$job\"}[$interval]))", + "expr": "sum by (job) (rate(thanos_compact_group_compactions_failures_total{job=\"$job\"}[$interval])) / sum by (job) (rate(thanos_compact_group_compactions_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -214,7 +214,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_compact_downsample_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, group)", + "expr": "sum by (job, group) (rate(thanos_compact_downsample_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "downsample {{job}} {{group}}", @@ -293,7 +293,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_compact_downsample_failed_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) / sum(rate(thanos_compact_downsample_total{namespace=\"$namespace\",job=~\"$job\"}[$interval]))", + "expr": "sum by (job) (rate(thanos_compact_downsample_failed_total{job=\"$job\"}[$interval])) / sum by (job) (rate(thanos_compact_downsample_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -382,7 +382,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_compact_garbage_collection_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job)", + "expr": "sum by (job) (rate(thanos_compact_garbage_collection_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "garbage collection {{job}}", @@ -461,7 +461,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_compact_garbage_collection_failures_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) / sum(rate(thanos_compact_garbage_collection_total{namespace=\"$namespace\",job=~\"$job\"}[$interval]))", + "expr": "sum by (job) (rate(thanos_compact_garbage_collection_failures_total{job=\"$job\"}[$interval])) / sum by (job) (rate(thanos_compact_garbage_collection_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -531,34 +531,62 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_compact_garbage_collection_duration_seconds_bucket{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(thanos_compact_garbage_collection_duration_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P99 {{job}}", + "legendFormat": "p50 {{job}}", + "logBase": 10, + "max": null, + "min": null, "refId": "A", "step": 10 }, { - "expr": "sum(rate(thanos_compact_garbage_collection_duration_seconds_sum{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job) * 1 / sum(rate(thanos_compact_garbage_collection_duration_seconds_count{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job)", + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(thanos_compact_garbage_collection_duration_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "mean {{job}}", - "refId": "B", + "legendFormat": "p90 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_compact_garbage_collection_duration_seconds_bucket{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(thanos_compact_garbage_collection_duration_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P50 {{job}}", - "refId": "C", + "legendFormat": "p99 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 } ], @@ -643,7 +671,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_compact_blocks_cleaned_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job)", + "expr": "sum by (job) (rate(thanos_compact_blocks_cleaned_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "Blocks cleanup {{job}}", @@ -720,7 +748,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_compact_block_cleanup_failures_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job)", + "expr": "sum by (job) (rate(thanos_compact_block_cleanup_failures_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "Blocks cleanup failures {{job}}", @@ -797,7 +825,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_compact_blocks_marked_for_deletion_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job)", + "expr": "sum by (job) (rate(thanos_compact_blocks_marked_for_deletion_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "Blocks marked {{job}}", @@ -886,7 +914,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_blocks_meta_syncs_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job)", + "expr": "sum by (job) (rate(thanos_blocks_meta_syncs_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "sync {{job}}", @@ -965,7 +993,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_blocks_meta_sync_failures_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) / sum(rate(thanos_blocks_meta_syncs_total{namespace=\"$namespace\",job=~\"$job\"}[$interval]))", + "expr": "sum by (job) (rate(thanos_blocks_meta_sync_failures_total{job=\"$job\"}[$interval])) / sum by (job) (rate(thanos_blocks_meta_syncs_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -1035,34 +1063,62 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_blocks_meta_sync_duration_seconds_bucket{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(thanos_blocks_meta_sync_duration_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P99 {{job}}", + "legendFormat": "p50 {{job}}", + "logBase": 10, + "max": null, + "min": null, "refId": "A", "step": 10 }, { - "expr": "sum(rate(thanos_blocks_meta_sync_duration_seconds_sum{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job) * 1 / sum(rate(thanos_blocks_meta_sync_duration_seconds_count{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job)", + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(thanos_blocks_meta_sync_duration_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "mean {{job}}", - "refId": "B", + "legendFormat": "p90 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_blocks_meta_sync_duration_seconds_bucket{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(thanos_blocks_meta_sync_duration_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P50 {{job}}", - "refId": "C", + "legendFormat": "p99 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 } ], @@ -1147,7 +1203,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_objstore_bucket_operations_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, operation)", + "expr": "sum by (job, operation) (rate(thanos_objstore_bucket_operations_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{operation}}", @@ -1226,7 +1282,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_objstore_bucket_operation_failures_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) / sum(rate(thanos_objstore_bucket_operations_total{namespace=\"$namespace\",job=~\"$job\"}[$interval]))", + "expr": "sum by (job) (rate(thanos_objstore_bucket_operation_failures_total{job=\"$job\"}[$interval])) / sum by (job) (rate(thanos_objstore_bucket_operations_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -1296,34 +1352,62 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P99 {{job}}", + "legendFormat": "p50 {{job}}", + "logBase": 10, + "max": null, + "min": null, "refId": "A", "step": 10 }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job) * 1 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job)", + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "mean {{job}}", - "refId": "B", + "legendFormat": "p90 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P50 {{job}}", - "refId": "C", + "legendFormat": "p99 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 } ], @@ -1407,7 +1491,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_memstats_alloc_bytes{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_memstats_alloc_bytes{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "alloc all {{instance}}", @@ -1415,7 +1499,7 @@ "step": 10 }, { - "expr": "go_memstats_heap_alloc_bytes{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_memstats_heap_alloc_bytes{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "alloc heap {{instance}}", @@ -1423,7 +1507,7 @@ "step": 10 }, { - "expr": "rate(go_memstats_alloc_bytes_total{namespace=\"$namespace\",job=~\"$job\"}[30s])", + "expr": "rate(go_memstats_alloc_bytes_total{job=\"$job\"})[30s]", "format": "time_series", "intervalFactor": 2, "legendFormat": "alloc rate all {{instance}}", @@ -1431,7 +1515,7 @@ "step": 10 }, { - "expr": "rate(go_memstats_heap_alloc_bytes{namespace=\"$namespace\",job=~\"$job\"}[30s])", + "expr": "rate(go_memstats_heap_alloc_bytes{job=\"$job\"})[30s]", "format": "time_series", "intervalFactor": 2, "legendFormat": "alloc rate heap {{instance}}", @@ -1439,18 +1523,18 @@ "step": 10 }, { - "expr": "go_memstats_stack_inuse_bytes{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_memstats_stack_inuse_bytes{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "inuse stack {{instance}}", + "legendFormat": "inuse heap {{instance}}", "legendLink": null, "step": 10 }, { - "expr": "go_memstats_heap_inuse_bytes{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_memstats_heap_inuse_bytes{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "inuse heap {{instance}}", + "legendFormat": "inuse stack {{instance}}", "legendLink": null, "step": 10 } @@ -1523,7 +1607,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_goroutines{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_goroutines{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -1599,7 +1683,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_gc_duration_seconds{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_gc_duration_seconds{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{quantile}} {{instance}}", @@ -1661,8 +1745,8 @@ "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": null, @@ -1674,27 +1758,22 @@ "type": "datasource" }, { - "allValue": null, - "current": { }, - "datasource": "$datasource", + "auto": true, + "auto_count": 300, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(thanos_status{}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "label": "interval", + "name": "interval", + "query": "5m,10m,30m,1h,6h,12h", + "refresh": 2, + "type": "interval" }, { - "allValue": "thanos-compact.*", + "allValue": null, "current": { "text": "all", "value": "$__all" @@ -1706,7 +1785,7 @@ "multi": false, "name": "job", "options": [ ], - "query": "label_values(up{namespace=\"$namespace\", job=~\"thanos-compact.*\"}, job)", + "query": "label_values(up{job=~\"thanos-compact.*\"}, job)", "refresh": 1, "regex": "", "sort": 2, @@ -1715,21 +1794,6 @@ "tagsQuery": "", "type": "query", "useTags": false - }, - { - "auto": true, - "auto_count": 300, - "auto_min": "10s", - "current": { - "text": "5m", - "value": "5m" - }, - "hide": 0, - "label": "interval", - "name": "interval", - "query": "5m,10m,30m,1h,6h,12h", - "refresh": 2, - "type": "interval" } ] }, diff --git a/examples/dashboards/overview.json b/examples/dashboards/overview.json index d8e7c530cf9..36ff7107440 100644 --- a/examples/dashboards/overview.json +++ b/examples/dashboards/overview.json @@ -14,15 +14,7 @@ "height": "250px", "panels": [ { - "aliasColors": { - "1xx": "#EAB839", - "2xx": "#7EB26D", - "3xx": "#6ED0E0", - "4xx": "#EF843C", - "5xx": "#E24D42", - "error": "#E24D42", - "success": "#7EB26D" - }, + "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, @@ -55,17 +47,38 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "/1../", + "color": "#EAB839" + }, + { + "alias": "/2../", + "color": "#37872D" + }, + { + "alias": "/3../", + "color": "#E0B400" + }, + { + "alias": "/4../", + "color": "#1F60C4" + }, + { + "alias": "/5../", + "color": "#C4162A" + } + ], "spaceLength": 10, "span": 4, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(label_replace(rate(http_requests_total{namespace=\"$namespace\",job=~\"thanos-query.*\",handler=\"query\"}[$interval]),\"status_code\", \"${1}xx\", \"code\", \"([0-9])..\")) by (job, handler, status_code)", + "expr": "sum by (job, handler, code) (rate(http_requests_total{handler=\"query\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{job}} {{handler}} {{status_code}}", + "legendFormat": "{{job}} {{handler}} {{code}}", "refId": "A", "step": 10 } @@ -149,7 +162,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(http_requests_total{namespace=\"$namespace\",job=~\"thanos-query.*\",handler=\"query\",code=~\"5..\"}[$interval])) / sum(rate(http_requests_total{namespace=\"$namespace\",job=~\"thanos-query.*\",handler=\"query\"}[$interval]))", + "expr": "sum by (job) (rate(http_requests_total{handler=\"query\",code=~\"5..\"}[$interval])) / sum by (job) (rate(http_requests_total{handler=\"query\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -234,7 +247,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(http_request_duration_seconds_bucket{namespace=\"$namespace\",job=~\"thanos-query.*\",handler=\"query\"}[$interval])) by (job, le))", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{handler=\"query\"}[$interval])))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} P99", @@ -308,15 +321,7 @@ "height": "250px", "panels": [ { - "aliasColors": { - "1xx": "#EAB839", - "2xx": "#7EB26D", - "3xx": "#6ED0E0", - "4xx": "#EF843C", - "5xx": "#E24D42", - "error": "#E24D42", - "success": "#7EB26D" - }, + "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, @@ -349,17 +354,38 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "/1../", + "color": "#EAB839" + }, + { + "alias": "/2../", + "color": "#37872D" + }, + { + "alias": "/3../", + "color": "#E0B400" + }, + { + "alias": "/4../", + "color": "#1F60C4" + }, + { + "alias": "/5../", + "color": "#C4162A" + } + ], "spaceLength": 10, "span": 4, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(label_replace(rate(http_requests_total{namespace=\"$namespace\",job=~\"thanos-query.*\",handler=\"query_range\"}[$interval]),\"status_code\", \"${1}xx\", \"code\", \"([0-9])..\")) by (job, handler, status_code)", + "expr": "sum by (job, handler, code) (rate(http_requests_total{handler=\"query_range\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{job}} {{handler}} {{status_code}}", + "legendFormat": "{{job}} {{handler}} {{code}}", "refId": "A", "step": 10 } @@ -443,7 +469,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(http_requests_total{namespace=\"$namespace\",job=~\"thanos-query.*\",handler=\"query_range\",code=~\"5..\"}[$interval])) / sum(rate(http_requests_total{namespace=\"$namespace\",job=~\"thanos-query.*\",handler=\"query_range\"}[$interval]))", + "expr": "sum by (job) (rate(http_requests_total{handler=\"query_range\",code=~\"5..\"}[$interval])) / sum by (job) (rate(http_requests_total{handler=\"query_range\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -528,7 +554,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(http_request_duration_seconds_bucket{namespace=\"$namespace\",job=~\"thanos-query.*\",handler=\"query_range\"}[$interval])) by (job, le))", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{handler=\"query_range\"}[$interval])))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} P99", @@ -602,26 +628,7 @@ "height": "250px", "panels": [ { - "aliasColors": { - "Aborted": "#EAB839", - "AlreadyExists": "#7EB26D", - "Canceled": "#E24D42", - "DataLoss": "#E24D42", - "DeadlineExceeded": "#E24D42", - "FailedPrecondition": "#6ED0E0", - "Internal": "#E24D42", - "InvalidArgument": "#EF843C", - "NotFound": "#EF843C", - "OK": "#7EB26D", - "OutOfRange": "#E24D42", - "PermissionDenied": "#EF843C", - "ResourceExhausted": "#E24D42", - "Unauthenticated": "#EF843C", - "Unavailable": "#E24D42", - "Unimplemented": "#6ED0E0", - "Unknown": "#E24D42", - "error": "#E24D42" - }, + "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, @@ -654,14 +661,87 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "/Aborted/", + "color": "#EAB839" + }, + { + "alias": "/AlreadyExists/", + "color": "#37872D" + }, + { + "alias": "/FailedPrecondition/", + "color": "#E0B400" + }, + { + "alias": "/Unimplemented/", + "color": "#E0B400" + }, + { + "alias": "/InvalidArgument/", + "color": "#1F60C4" + }, + { + "alias": "/NotFound/", + "color": "#1F60C4" + }, + { + "alias": "/PermissionDenied/", + "color": "#1F60C4" + }, + { + "alias": "/Unauthenticated/", + "color": "#1F60C4" + }, + { + "alias": "/Canceled/", + "color": "#C4162A" + }, + { + "alias": "/DataLoss/", + "color": "#C4162A" + }, + { + "alias": "/DeadlineExceeded/", + "color": "#C4162A" + }, + { + "alias": "/Internal/", + "color": "#C4162A" + }, + { + "alias": "/OutOfRange/", + "color": "#C4162A" + }, + { + "alias": "/ResourceExhausted/", + "color": "#C4162A" + }, + { + "alias": "/Unavailable/", + "color": "#C4162A" + }, + { + "alias": "/Unknown/", + "color": "#C4162A" + }, + { + "alias": "/OK/", + "color": "#37872D" + }, + { + "alias": "error", + "color": "#C4162A" + } + ], "spaceLength": 10, "span": 4, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_server_handled_total{namespace=\"$namespace\",job=~\"thanos-store.*\",grpc_type=\"unary\"}[$interval])) by (job, grpc_method, grpc_code)", + "expr": "sum by (job, grpc_method, grpc_code) (rate(grpc_server_handled_total{grpc_type=\"unary\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{grpc_method}} {{grpc_code}}", @@ -748,7 +828,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",namespace=\"$namespace\",job=~\"thanos-store.*\",grpc_type=\"unary\"}[$interval])) / sum(rate(grpc_server_started_total{namespace=\"$namespace\",job=~\"thanos-store.*\",grpc_type=\"unary\"}[$interval]))", + "expr": "sum by (job) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",grpc_type=\"unary\"}[$interval])) / sum by (job) (rate(grpc_server_handled_total{grpc_type=\"unary\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -833,7 +913,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{grpc_type=\"unary\",namespace=\"$namespace\",job=~\"thanos-store.*\"}[$interval])) by (job, le))", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{grpc_type=\"unary\"}[$interval])))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} P99", @@ -907,26 +987,7 @@ "height": "250px", "panels": [ { - "aliasColors": { - "Aborted": "#EAB839", - "AlreadyExists": "#7EB26D", - "Canceled": "#E24D42", - "DataLoss": "#E24D42", - "DeadlineExceeded": "#E24D42", - "FailedPrecondition": "#6ED0E0", - "Internal": "#E24D42", - "InvalidArgument": "#EF843C", - "NotFound": "#EF843C", - "OK": "#7EB26D", - "OutOfRange": "#E24D42", - "PermissionDenied": "#EF843C", - "ResourceExhausted": "#E24D42", - "Unauthenticated": "#EF843C", - "Unavailable": "#E24D42", - "Unimplemented": "#6ED0E0", - "Unknown": "#E24D42", - "error": "#E24D42" - }, + "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, @@ -959,14 +1020,87 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "/Aborted/", + "color": "#EAB839" + }, + { + "alias": "/AlreadyExists/", + "color": "#37872D" + }, + { + "alias": "/FailedPrecondition/", + "color": "#E0B400" + }, + { + "alias": "/Unimplemented/", + "color": "#E0B400" + }, + { + "alias": "/InvalidArgument/", + "color": "#1F60C4" + }, + { + "alias": "/NotFound/", + "color": "#1F60C4" + }, + { + "alias": "/PermissionDenied/", + "color": "#1F60C4" + }, + { + "alias": "/Unauthenticated/", + "color": "#1F60C4" + }, + { + "alias": "/Canceled/", + "color": "#C4162A" + }, + { + "alias": "/DataLoss/", + "color": "#C4162A" + }, + { + "alias": "/DeadlineExceeded/", + "color": "#C4162A" + }, + { + "alias": "/Internal/", + "color": "#C4162A" + }, + { + "alias": "/OutOfRange/", + "color": "#C4162A" + }, + { + "alias": "/ResourceExhausted/", + "color": "#C4162A" + }, + { + "alias": "/Unavailable/", + "color": "#C4162A" + }, + { + "alias": "/Unknown/", + "color": "#C4162A" + }, + { + "alias": "/OK/", + "color": "#37872D" + }, + { + "alias": "error", + "color": "#C4162A" + } + ], "spaceLength": 10, "span": 4, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_server_handled_total{namespace=\"$namespace\",job=~\"thanos-sidecar.*\",grpc_type=\"unary\"}[$interval])) by (job, grpc_method, grpc_code)", + "expr": "sum by (job, grpc_method, grpc_code) (rate(grpc_server_handled_total{grpc_type=\"unary\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{grpc_method}} {{grpc_code}}", @@ -1053,7 +1187,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",namespace=\"$namespace\",job=~\"thanos-sidecar.*\",grpc_type=\"unary\"}[$interval])) / sum(rate(grpc_server_started_total{namespace=\"$namespace\",job=~\"thanos-sidecar.*\",grpc_type=\"unary\"}[$interval]))", + "expr": "sum by (job) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",grpc_type=\"unary\"}[$interval])) / sum by (job) (rate(grpc_server_handled_total{grpc_type=\"unary\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -1138,7 +1272,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{grpc_type=\"unary\",namespace=\"$namespace\",job=~\"thanos-sidecar.*\"}[$interval])) by (job, le))", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{grpc_type=\"unary\"}[$interval])))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} P99", @@ -1212,15 +1346,7 @@ "height": "250px", "panels": [ { - "aliasColors": { - "1xx": "#EAB839", - "2xx": "#7EB26D", - "3xx": "#6ED0E0", - "4xx": "#EF843C", - "5xx": "#E24D42", - "error": "#E24D42", - "success": "#7EB26D" - }, + "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, @@ -1253,17 +1379,38 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "/1../", + "color": "#EAB839" + }, + { + "alias": "/2../", + "color": "#37872D" + }, + { + "alias": "/3../", + "color": "#E0B400" + }, + { + "alias": "/4../", + "color": "#1F60C4" + }, + { + "alias": "/5../", + "color": "#C4162A" + } + ], "spaceLength": 10, "span": 4, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(label_replace(rate(http_requests_total{handler=\"receive\",namespace=\"$namespace\",job=~\"thanos-receive.*\"}[$interval]),\"status_code\", \"${1}xx\", \"code\", \"([0-9])..\")) by (job, handler, status_code)", + "expr": "sum by (job, handler, code) (rate(http_requests_total{handler=\"receive\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{job}} {{handler}} {{status_code}}", + "legendFormat": "{{job}} {{handler}} {{code}}", "refId": "A", "step": 10 } @@ -1347,7 +1494,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(http_requests_total{handler=\"receive\",namespace=\"$namespace\",job=~\"thanos-receive.*\",code=~\"5..\"}[$interval])) / sum(rate(http_requests_total{handler=\"receive\",namespace=\"$namespace\",job=~\"thanos-receive.*\"}[$interval]))", + "expr": "sum by (job) (rate(http_requests_total{handler=\"receive\",code=~\"5..\"}[$interval])) / sum by (job) (rate(http_requests_total{handler=\"receive\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -1432,7 +1579,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(http_request_duration_seconds_bucket{handler=\"receive\",namespace=\"$namespace\",job=~\"thanos-receive.*\"}[$interval])) by (job, le))", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{handler=\"receive\"}[$interval])))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} P99", @@ -1546,7 +1693,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_alert_sender_alerts_sent_total{namespace=\"$namespace\",job=~\"thanos-rule.*\"}[$interval])) by (job, alertmanager)", + "expr": "sum by (job, alertmanager) (rate(thanos_alert_sender_alerts_sent_total{}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{alertmanager}}", @@ -1633,7 +1780,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_alert_sender_errors_total{namespace=\"$namespace\",job=~\"thanos-rule.*\"}[$interval])) / sum(rate(thanos_alert_sender_alerts_sent_total{namespace=\"$namespace\",job=~\"thanos-rule.*\"}[$interval]))", + "expr": "sum by (job) (rate(thanos_alert_sender_errors_total{}[$interval])) / sum by (job) (rate(thanos_alert_sender_alerts_sent_total{}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -1718,7 +1865,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_alert_sender_latency_seconds_bucket{namespace=\"$namespace\",job=~\"thanos-rule.*\"}[$interval])) by (job, le))", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(thanos_alert_sender_latency_seconds_bucket{}[$interval])))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} P99", @@ -1832,7 +1979,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_compact_group_compactions_total{namespace=\"$namespace\",job=~\"thanos-compact.*\"}[$interval])) by (job)", + "expr": "sum by (job) (rate(thanos_compact_group_compactions_total{}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "compaction {{job}}", @@ -1919,7 +2066,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_compact_group_compactions_failures_total{namespace=\"$namespace\",job=~\"thanos-compact.*\"}[$interval])) / sum(rate(thanos_compact_group_compactions_total{namespace=\"$namespace\",job=~\"thanos-compact.*\"}[$interval]))", + "expr": "sum by (job) (rate(thanos_compact_group_compactions_failures_total{}[$interval])) / sum by (job) (rate(thanos_compact_group_compactions_total{}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -1981,8 +2128,8 @@ "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": null, @@ -1993,26 +2140,6 @@ "regex": "", "type": "datasource" }, - { - "allValue": null, - "current": { }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(thanos_status{}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, { "auto": true, "auto_count": 300, diff --git a/examples/dashboards/query.json b/examples/dashboards/query.json index d6c06b9537f..01a9138422f 100644 --- a/examples/dashboards/query.json +++ b/examples/dashboards/query.json @@ -14,15 +14,7 @@ "height": "250px", "panels": [ { - "aliasColors": { - "1xx": "#EAB839", - "2xx": "#7EB26D", - "3xx": "#6ED0E0", - "4xx": "#EF843C", - "5xx": "#E24D42", - "error": "#E24D42", - "success": "#7EB26D" - }, + "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, @@ -47,17 +39,38 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "/1../", + "color": "#EAB839" + }, + { + "alias": "/2../", + "color": "#37872D" + }, + { + "alias": "/3../", + "color": "#E0B400" + }, + { + "alias": "/4../", + "color": "#1F60C4" + }, + { + "alias": "/5../", + "color": "#C4162A" + } + ], "spaceLength": 10, "span": 4, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(label_replace(rate(http_requests_total{namespace=\"$namespace\",job=~\"$job\",handler=\"query\"}[$interval]),\"status_code\", \"${1}xx\", \"code\", \"([0-9])..\")) by (job, handler, status_code)", + "expr": "sum by (job, handler, code) (rate(http_requests_total{job=\"$job\", handler=\"query\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{job}} {{handler}} {{status_code}}", + "legendFormat": "{{job}} {{handler}} {{code}}", "refId": "A", "step": 10 } @@ -133,7 +146,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(http_requests_total{namespace=\"$namespace\",job=~\"$job\",handler=\"query\",code=~\"5..\"}[$interval])) / sum(rate(http_requests_total{namespace=\"$namespace\",job=~\"$job\",handler=\"query\"}[$interval]))", + "expr": "sum by (job) (rate(http_requests_total{job=\"$job\", handler=\"query\",code=~\"5..\"}[$interval])) / sum by (job) (rate(http_requests_total{job=\"$job\", handler=\"query\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -203,34 +216,62 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(http_request_duration_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",handler=\"query\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=\"$job\", handler=\"query\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P99 {{job}}", + "legendFormat": "p50 {{job}}", + "logBase": 10, + "max": null, + "min": null, "refId": "A", "step": 10 }, { - "expr": "sum(rate(http_request_duration_seconds_sum{namespace=\"$namespace\",job=~\"$job\",handler=\"query\"}[$interval])) by (job) * 1 / sum(rate(http_request_duration_seconds_count{namespace=\"$namespace\",job=~\"$job\",handler=\"query\"}[$interval])) by (job)", + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=\"$job\", handler=\"query\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "mean {{job}}", - "refId": "B", + "legendFormat": "p90 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(http_request_duration_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",handler=\"query\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=\"$job\", handler=\"query\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P50 {{job}}", - "refId": "C", + "legendFormat": "p99 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 } ], @@ -283,15 +324,7 @@ "height": "250px", "panels": [ { - "aliasColors": { - "1xx": "#EAB839", - "2xx": "#7EB26D", - "3xx": "#6ED0E0", - "4xx": "#EF843C", - "5xx": "#E24D42", - "error": "#E24D42", - "success": "#7EB26D" - }, + "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, @@ -316,17 +349,38 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "/1../", + "color": "#EAB839" + }, + { + "alias": "/2../", + "color": "#37872D" + }, + { + "alias": "/3../", + "color": "#E0B400" + }, + { + "alias": "/4../", + "color": "#1F60C4" + }, + { + "alias": "/5../", + "color": "#C4162A" + } + ], "spaceLength": 10, "span": 4, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(label_replace(rate(http_requests_total{namespace=\"$namespace\",job=~\"$job\",handler=\"query_range\"}[$interval]),\"status_code\", \"${1}xx\", \"code\", \"([0-9])..\")) by (job, handler, status_code)", + "expr": "sum by (job, handler, code) (rate(http_requests_total{job=\"$job\", handler=\"query_range\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{job}} {{handler}} {{status_code}}", + "legendFormat": "{{job}} {{handler}} {{code}}", "refId": "A", "step": 10 } @@ -402,7 +456,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(http_requests_total{namespace=\"$namespace\",job=~\"$job\",handler=\"query_range\",code=~\"5..\"}[$interval])) / sum(rate(http_requests_total{namespace=\"$namespace\",job=~\"$job\",handler=\"query_range\"}[$interval]))", + "expr": "sum by (job) (rate(http_requests_total{job=\"$job\", handler=\"query_range\",code=~\"5..\"}[$interval])) / sum by (job) (rate(http_requests_total{job=\"$job\", handler=\"query_range\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -472,34 +526,62 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(http_request_duration_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",handler=\"query_range\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=\"$job\", handler=\"query_range\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P99 {{job}}", + "legendFormat": "p50 {{job}}", + "logBase": 10, + "max": null, + "min": null, "refId": "A", "step": 10 }, { - "expr": "sum(rate(http_request_duration_seconds_sum{namespace=\"$namespace\",job=~\"$job\",handler=\"query_range\"}[$interval])) by (job) * 1 / sum(rate(http_request_duration_seconds_count{namespace=\"$namespace\",job=~\"$job\",handler=\"query_range\"}[$interval])) by (job)", + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=\"$job\", handler=\"query_range\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "mean {{job}}", - "refId": "B", + "legendFormat": "p90 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(http_request_duration_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",handler=\"query_range\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=\"$job\", handler=\"query_range\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P50 {{job}}", - "refId": "C", + "legendFormat": "p99 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 } ], @@ -552,26 +634,7 @@ "height": "250px", "panels": [ { - "aliasColors": { - "Aborted": "#EAB839", - "AlreadyExists": "#7EB26D", - "Canceled": "#E24D42", - "DataLoss": "#E24D42", - "DeadlineExceeded": "#E24D42", - "FailedPrecondition": "#6ED0E0", - "Internal": "#E24D42", - "InvalidArgument": "#EF843C", - "NotFound": "#EF843C", - "OK": "#7EB26D", - "OutOfRange": "#E24D42", - "PermissionDenied": "#EF843C", - "ResourceExhausted": "#E24D42", - "Unauthenticated": "#EF843C", - "Unavailable": "#E24D42", - "Unimplemented": "#6ED0E0", - "Unknown": "#E24D42", - "error": "#E24D42" - }, + "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, @@ -596,14 +659,87 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "/Aborted/", + "color": "#EAB839" + }, + { + "alias": "/AlreadyExists/", + "color": "#37872D" + }, + { + "alias": "/FailedPrecondition/", + "color": "#E0B400" + }, + { + "alias": "/Unimplemented/", + "color": "#E0B400" + }, + { + "alias": "/InvalidArgument/", + "color": "#1F60C4" + }, + { + "alias": "/NotFound/", + "color": "#1F60C4" + }, + { + "alias": "/PermissionDenied/", + "color": "#1F60C4" + }, + { + "alias": "/Unauthenticated/", + "color": "#1F60C4" + }, + { + "alias": "/Canceled/", + "color": "#C4162A" + }, + { + "alias": "/DataLoss/", + "color": "#C4162A" + }, + { + "alias": "/DeadlineExceeded/", + "color": "#C4162A" + }, + { + "alias": "/Internal/", + "color": "#C4162A" + }, + { + "alias": "/OutOfRange/", + "color": "#C4162A" + }, + { + "alias": "/ResourceExhausted/", + "color": "#C4162A" + }, + { + "alias": "/Unavailable/", + "color": "#C4162A" + }, + { + "alias": "/Unknown/", + "color": "#C4162A" + }, + { + "alias": "/OK/", + "color": "#37872D" + }, + { + "alias": "error", + "color": "#C4162A" + } + ], "spaceLength": 10, "span": 4, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_client_handled_total{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval])) by (job, grpc_method, grpc_code)", + "expr": "sum by (job, grpc_method, grpc_code) (rate(grpc_client_handled_total{job=\"$job\", grpc_type=\"unary\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{grpc_method}} {{grpc_code}}", @@ -682,7 +818,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_client_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval])) / sum(rate(grpc_client_started_total{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval]))", + "expr": "sum by (job) (rate(grpc_client_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=\"$job\", grpc_type=\"unary\"}[$interval])) / sum by (job) (rate(grpc_client_handled_total{job=\"$job\", grpc_type=\"unary\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -752,34 +888,62 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(grpc_client_handling_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval])) by (job, grpc_method, le)) * 1", + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(grpc_client_handling_seconds_bucket{job=\"$job\", grpc_type=\"unary\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P99 {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p50 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "sum(rate(grpc_client_handling_seconds_sum{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval])) by (job) * 1\n/\nsum(rate(grpc_client_handling_seconds_count{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval])) by (job)\n", + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(grpc_client_handling_seconds_bucket{job=\"$job\", grpc_type=\"unary\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "mean {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p90 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(grpc_client_handling_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval])) by (job, grpc_method, le)) * 1", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(grpc_client_handling_seconds_bucket{job=\"$job\", grpc_type=\"unary\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P50 {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p99 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 } ], @@ -832,26 +996,7 @@ "height": "250px", "panels": [ { - "aliasColors": { - "Aborted": "#EAB839", - "AlreadyExists": "#7EB26D", - "Canceled": "#E24D42", - "DataLoss": "#E24D42", - "DeadlineExceeded": "#E24D42", - "FailedPrecondition": "#6ED0E0", - "Internal": "#E24D42", - "InvalidArgument": "#EF843C", - "NotFound": "#EF843C", - "OK": "#7EB26D", - "OutOfRange": "#E24D42", - "PermissionDenied": "#EF843C", - "ResourceExhausted": "#E24D42", - "Unauthenticated": "#EF843C", - "Unavailable": "#E24D42", - "Unimplemented": "#6ED0E0", - "Unknown": "#E24D42", - "error": "#E24D42" - }, + "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, @@ -876,14 +1021,87 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "/Aborted/", + "color": "#EAB839" + }, + { + "alias": "/AlreadyExists/", + "color": "#37872D" + }, + { + "alias": "/FailedPrecondition/", + "color": "#E0B400" + }, + { + "alias": "/Unimplemented/", + "color": "#E0B400" + }, + { + "alias": "/InvalidArgument/", + "color": "#1F60C4" + }, + { + "alias": "/NotFound/", + "color": "#1F60C4" + }, + { + "alias": "/PermissionDenied/", + "color": "#1F60C4" + }, + { + "alias": "/Unauthenticated/", + "color": "#1F60C4" + }, + { + "alias": "/Canceled/", + "color": "#C4162A" + }, + { + "alias": "/DataLoss/", + "color": "#C4162A" + }, + { + "alias": "/DeadlineExceeded/", + "color": "#C4162A" + }, + { + "alias": "/Internal/", + "color": "#C4162A" + }, + { + "alias": "/OutOfRange/", + "color": "#C4162A" + }, + { + "alias": "/ResourceExhausted/", + "color": "#C4162A" + }, + { + "alias": "/Unavailable/", + "color": "#C4162A" + }, + { + "alias": "/Unknown/", + "color": "#C4162A" + }, + { + "alias": "/OK/", + "color": "#37872D" + }, + { + "alias": "error", + "color": "#C4162A" + } + ], "spaceLength": 10, "span": 4, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_client_handled_total{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) by (job, grpc_method, grpc_code)", + "expr": "sum by (job, grpc_method, grpc_code) (rate(grpc_client_handled_total{job=\"$job\", grpc_type=\"server_stream\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{grpc_method}} {{grpc_code}}", @@ -962,7 +1180,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_client_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) / sum(rate(grpc_client_started_total{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval]))", + "expr": "sum by (job) (rate(grpc_client_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=\"$job\", grpc_type=\"server_stream\"}[$interval])) / sum by (job) (rate(grpc_client_handled_total{job=\"$job\", grpc_type=\"server_stream\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -1032,34 +1250,62 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(grpc_client_handling_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) by (job, grpc_method, le)) * 1", + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(grpc_client_handling_seconds_bucket{job=\"$job\", grpc_type=\"server_stream\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P99 {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p50 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "sum(rate(grpc_client_handling_seconds_sum{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) by (job) * 1\n/\nsum(rate(grpc_client_handling_seconds_count{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) by (job)\n", + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(grpc_client_handling_seconds_bucket{job=\"$job\", grpc_type=\"server_stream\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "mean {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p90 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(grpc_client_handling_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) by (job, grpc_method, le)) * 1", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(grpc_client_handling_seconds_bucket{job=\"$job\", grpc_type=\"server_stream\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P50 {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p99 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 } ], @@ -1144,7 +1390,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_query_store_apis_dns_lookups_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job)", + "expr": "sum by (job) (rate(thanos_query_store_apis_dns_lookups_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "lookups {{job}}", @@ -1223,7 +1469,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_query_store_apis_dns_failures_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) / sum(rate(thanos_query_store_apis_dns_lookups_total{namespace=\"$namespace\",job=~\"$job\"}[$interval]))", + "expr": "sum by (job) (rate(thanos_query_store_apis_dns_failures_total{job=\"$job\"}[$interval])) / sum by (job) (rate(thanos_query_store_apis_dns_lookups_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -1311,7 +1557,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_memstats_alloc_bytes{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_memstats_alloc_bytes{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "alloc all {{instance}}", @@ -1319,7 +1565,7 @@ "step": 10 }, { - "expr": "go_memstats_heap_alloc_bytes{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_memstats_heap_alloc_bytes{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "alloc heap {{instance}}", @@ -1327,7 +1573,7 @@ "step": 10 }, { - "expr": "rate(go_memstats_alloc_bytes_total{namespace=\"$namespace\",job=~\"$job\"}[30s])", + "expr": "rate(go_memstats_alloc_bytes_total{job=\"$job\"})[30s]", "format": "time_series", "intervalFactor": 2, "legendFormat": "alloc rate all {{instance}}", @@ -1335,7 +1581,7 @@ "step": 10 }, { - "expr": "rate(go_memstats_heap_alloc_bytes{namespace=\"$namespace\",job=~\"$job\"}[30s])", + "expr": "rate(go_memstats_heap_alloc_bytes{job=\"$job\"})[30s]", "format": "time_series", "intervalFactor": 2, "legendFormat": "alloc rate heap {{instance}}", @@ -1343,18 +1589,18 @@ "step": 10 }, { - "expr": "go_memstats_stack_inuse_bytes{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_memstats_stack_inuse_bytes{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "inuse stack {{instance}}", + "legendFormat": "inuse heap {{instance}}", "legendLink": null, "step": 10 }, { - "expr": "go_memstats_heap_inuse_bytes{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_memstats_heap_inuse_bytes{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "inuse heap {{instance}}", + "legendFormat": "inuse stack {{instance}}", "legendLink": null, "step": 10 } @@ -1427,7 +1673,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_goroutines{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_goroutines{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -1503,7 +1749,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_gc_duration_seconds{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_gc_duration_seconds{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{quantile}} {{instance}}", @@ -1565,8 +1811,8 @@ "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": null, @@ -1578,27 +1824,22 @@ "type": "datasource" }, { - "allValue": null, - "current": { }, - "datasource": "$datasource", + "auto": true, + "auto_count": 300, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(thanos_status{}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "label": "interval", + "name": "interval", + "query": "5m,10m,30m,1h,6h,12h", + "refresh": 2, + "type": "interval" }, { - "allValue": "thanos-query.*", + "allValue": null, "current": { "text": "all", "value": "$__all" @@ -1610,7 +1851,7 @@ "multi": false, "name": "job", "options": [ ], - "query": "label_values(up{namespace=\"$namespace\", job=~\"thanos-query.*\"}, job)", + "query": "label_values(up{job=~\"thanos-query.*\"}, job)", "refresh": 1, "regex": "", "sort": 2, @@ -1619,21 +1860,6 @@ "tagsQuery": "", "type": "query", "useTags": false - }, - { - "auto": true, - "auto_count": 300, - "auto_min": "10s", - "current": { - "text": "5m", - "value": "5m" - }, - "hide": 0, - "label": "interval", - "name": "interval", - "query": "5m,10m,30m,1h,6h,12h", - "refresh": 2, - "type": "interval" } ] }, diff --git a/examples/dashboards/receive.json b/examples/dashboards/receive.json index 279b153617d..de0addf3c34 100644 --- a/examples/dashboards/receive.json +++ b/examples/dashboards/receive.json @@ -14,15 +14,7 @@ "height": "250px", "panels": [ { - "aliasColors": { - "1xx": "#EAB839", - "2xx": "#7EB26D", - "3xx": "#6ED0E0", - "4xx": "#EF843C", - "5xx": "#E24D42", - "error": "#E24D42", - "success": "#7EB26D" - }, + "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, @@ -47,17 +39,38 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "/1../", + "color": "#EAB839" + }, + { + "alias": "/2../", + "color": "#37872D" + }, + { + "alias": "/3../", + "color": "#E0B400" + }, + { + "alias": "/4../", + "color": "#1F60C4" + }, + { + "alias": "/5../", + "color": "#C4162A" + } + ], "spaceLength": 10, "span": 4, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(label_replace(rate(http_requests_total{handler=\"receive\",namespace=\"$namespace\",job=~\"$job\"}[$interval]),\"status_code\", \"${1}xx\", \"code\", \"([0-9])..\")) by (job, handler, status_code)", + "expr": "sum by (job, handler, code) (rate(http_requests_total{job=\"$job\", handler=\"receive\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{job}} {{handler}} {{status_code}}", + "legendFormat": "{{job}} {{handler}} {{code}}", "refId": "A", "step": 10 } @@ -133,7 +146,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(http_requests_total{handler=\"receive\",namespace=\"$namespace\",job=~\"$job\",code=~\"5..\"}[$interval])) / sum(rate(http_requests_total{handler=\"receive\",namespace=\"$namespace\",job=~\"$job\"}[$interval]))", + "expr": "sum by (job) (rate(http_requests_total{job=\"$job\", handler=\"receive\",code=~\"5..\"}[$interval])) / sum by (job) (rate(http_requests_total{job=\"$job\", handler=\"receive\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -203,34 +216,62 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(http_request_duration_seconds_bucket{handler=\"receive\",namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=\"$job\", handler=\"receive\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P99 {{job}}", + "legendFormat": "p50 {{job}}", + "logBase": 10, + "max": null, + "min": null, "refId": "A", "step": 10 }, { - "expr": "sum(rate(http_request_duration_seconds_sum{handler=\"receive\",namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job) * 1 / sum(rate(http_request_duration_seconds_count{handler=\"receive\",namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job)", + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=\"$job\", handler=\"receive\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "mean {{job}}", - "refId": "B", + "legendFormat": "p90 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(http_request_duration_seconds_bucket{handler=\"receive\",namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=\"$job\", handler=\"receive\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P50 {{job}}", - "refId": "C", + "legendFormat": "p99 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 } ], @@ -315,7 +356,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_receive_replications_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job)", + "expr": "sum by (job) (rate(thanos_receive_replications_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "all {{job}}", @@ -394,7 +435,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_receive_replications_total{namespace=\"$namespace\",job=~\"$job\",result=\"error\"}[$interval])) / sum(rate(thanos_receive_replications_total{namespace=\"$namespace\",job=~\"$job\"}[$interval]))", + "expr": "sum by (job) (rate(thanos_receive_replications_total{job=\"$job\", result=\"error\"}[$interval])) / sum by (job) (rate(thanos_receive_replications_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -483,7 +524,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_receive_forward_requests_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job)", + "expr": "sum by (job) (rate(thanos_receive_forward_requests_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "all {{job}}", @@ -562,7 +603,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_receive_forward_requests_total{namespace=\"$namespace\",job=~\"$job\",result=\"error\"}[$interval])) / sum(rate(thanos_receive_forward_requests_total{namespace=\"$namespace\",job=~\"$job\"}[$interval]))", + "expr": "sum by (job) (rate(thanos_receive_forward_requests_total{job=\"$job\", result=\"error\"}[$interval])) / sum by (job) (rate(thanos_receive_forward_requests_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -619,26 +660,7 @@ "height": "250px", "panels": [ { - "aliasColors": { - "Aborted": "#EAB839", - "AlreadyExists": "#7EB26D", - "Canceled": "#E24D42", - "DataLoss": "#E24D42", - "DeadlineExceeded": "#E24D42", - "FailedPrecondition": "#6ED0E0", - "Internal": "#E24D42", - "InvalidArgument": "#EF843C", - "NotFound": "#EF843C", - "OK": "#7EB26D", - "OutOfRange": "#E24D42", - "PermissionDenied": "#EF843C", - "ResourceExhausted": "#E24D42", - "Unauthenticated": "#EF843C", - "Unavailable": "#E24D42", - "Unimplemented": "#6ED0E0", - "Unknown": "#E24D42", - "error": "#E24D42" - }, + "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, @@ -663,14 +685,87 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "/Aborted/", + "color": "#EAB839" + }, + { + "alias": "/AlreadyExists/", + "color": "#37872D" + }, + { + "alias": "/FailedPrecondition/", + "color": "#E0B400" + }, + { + "alias": "/Unimplemented/", + "color": "#E0B400" + }, + { + "alias": "/InvalidArgument/", + "color": "#1F60C4" + }, + { + "alias": "/NotFound/", + "color": "#1F60C4" + }, + { + "alias": "/PermissionDenied/", + "color": "#1F60C4" + }, + { + "alias": "/Unauthenticated/", + "color": "#1F60C4" + }, + { + "alias": "/Canceled/", + "color": "#C4162A" + }, + { + "alias": "/DataLoss/", + "color": "#C4162A" + }, + { + "alias": "/DeadlineExceeded/", + "color": "#C4162A" + }, + { + "alias": "/Internal/", + "color": "#C4162A" + }, + { + "alias": "/OutOfRange/", + "color": "#C4162A" + }, + { + "alias": "/ResourceExhausted/", + "color": "#C4162A" + }, + { + "alias": "/Unavailable/", + "color": "#C4162A" + }, + { + "alias": "/Unknown/", + "color": "#C4162A" + }, + { + "alias": "/OK/", + "color": "#37872D" + }, + { + "alias": "error", + "color": "#C4162A" + } + ], "spaceLength": 10, "span": 4, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_server_handled_total{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\",grpc_method=\"RemoteWrite\"}[$interval])) by (job, grpc_method, grpc_code)", + "expr": "sum by (job, grpc_method, grpc_code) (rate(grpc_server_handled_total{job=\"$job\", grpc_type=\"unary\", grpc_method=\"RemoteWrite\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{grpc_method}} {{grpc_code}}", @@ -749,7 +844,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\",grpc_method=\"RemoteWrite\"}[$interval])) / sum(rate(grpc_server_started_total{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\",grpc_method=\"RemoteWrite\"}[$interval]))", + "expr": "sum by (job) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=\"$job\", grpc_type=\"unary\", grpc_method=\"RemoteWrite\"}[$interval])) / sum by (job) (rate(grpc_server_handled_total{job=\"$job\", grpc_type=\"unary\", grpc_method=\"RemoteWrite\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -819,34 +914,62 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\",grpc_method=\"RemoteWrite\"}[$interval])) by (job, grpc_method, le)) * 1", + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"unary\", grpc_method=\"RemoteWrite\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P99 {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p50 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "sum(rate(grpc_server_handling_seconds_sum{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\",grpc_method=\"RemoteWrite\"}[$interval])) by (job) * 1\n/\nsum(rate(grpc_server_handling_seconds_count{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\",grpc_method=\"RemoteWrite\"}[$interval])) by (job)\n", + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"unary\", grpc_method=\"RemoteWrite\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "mean {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p90 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(grpc_server_handling_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\",grpc_method=\"RemoteWrite\"}[$interval])) by (job, grpc_method, le)) * 1", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"unary\", grpc_method=\"RemoteWrite\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P50 {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p99 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 } ], @@ -899,26 +1022,7 @@ "height": "250px", "panels": [ { - "aliasColors": { - "Aborted": "#EAB839", - "AlreadyExists": "#7EB26D", - "Canceled": "#E24D42", - "DataLoss": "#E24D42", - "DeadlineExceeded": "#E24D42", - "FailedPrecondition": "#6ED0E0", - "Internal": "#E24D42", - "InvalidArgument": "#EF843C", - "NotFound": "#EF843C", - "OK": "#7EB26D", - "OutOfRange": "#E24D42", - "PermissionDenied": "#EF843C", - "ResourceExhausted": "#E24D42", - "Unauthenticated": "#EF843C", - "Unavailable": "#E24D42", - "Unimplemented": "#6ED0E0", - "Unknown": "#E24D42", - "error": "#E24D42" - }, + "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, @@ -943,14 +1047,87 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "/Aborted/", + "color": "#EAB839" + }, + { + "alias": "/AlreadyExists/", + "color": "#37872D" + }, + { + "alias": "/FailedPrecondition/", + "color": "#E0B400" + }, + { + "alias": "/Unimplemented/", + "color": "#E0B400" + }, + { + "alias": "/InvalidArgument/", + "color": "#1F60C4" + }, + { + "alias": "/NotFound/", + "color": "#1F60C4" + }, + { + "alias": "/PermissionDenied/", + "color": "#1F60C4" + }, + { + "alias": "/Unauthenticated/", + "color": "#1F60C4" + }, + { + "alias": "/Canceled/", + "color": "#C4162A" + }, + { + "alias": "/DataLoss/", + "color": "#C4162A" + }, + { + "alias": "/DeadlineExceeded/", + "color": "#C4162A" + }, + { + "alias": "/Internal/", + "color": "#C4162A" + }, + { + "alias": "/OutOfRange/", + "color": "#C4162A" + }, + { + "alias": "/ResourceExhausted/", + "color": "#C4162A" + }, + { + "alias": "/Unavailable/", + "color": "#C4162A" + }, + { + "alias": "/Unknown/", + "color": "#C4162A" + }, + { + "alias": "/OK/", + "color": "#37872D" + }, + { + "alias": "error", + "color": "#C4162A" + } + ], "spaceLength": 10, "span": 4, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_server_handled_total{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\",grpc_method!=\"RemoteWrite\"}[$interval])) by (job, grpc_method, grpc_code)", + "expr": "sum by (job, grpc_method, grpc_code) (rate(grpc_server_handled_total{job=\"$job\", grpc_type=\"unary\", grpc_method!=\"RemoteWrite\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{grpc_method}} {{grpc_code}}", @@ -1029,7 +1206,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\",grpc_method!=\"RemoteWrite\"}[$interval])) / sum(rate(grpc_server_started_total{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\",grpc_method!=\"RemoteWrite\"}[$interval]))", + "expr": "sum by (job) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=\"$job\", grpc_type=\"unary\", grpc_method!=\"RemoteWrite\"}[$interval])) / sum by (job) (rate(grpc_server_handled_total{job=\"$job\", grpc_type=\"unary\", grpc_method!=\"RemoteWrite\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -1099,34 +1276,62 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\",grpc_method!=\"RemoteWrite\"}[$interval])) by (job, grpc_method, le)) * 1", + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"unary\", grpc_method!=\"RemoteWrite\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P99 {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p50 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "sum(rate(grpc_server_handling_seconds_sum{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\",grpc_method!=\"RemoteWrite\"}[$interval])) by (job) * 1\n/\nsum(rate(grpc_server_handling_seconds_count{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\",grpc_method!=\"RemoteWrite\"}[$interval])) by (job)\n", + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"unary\", grpc_method!=\"RemoteWrite\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "mean {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p90 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(grpc_server_handling_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\",grpc_method!=\"RemoteWrite\"}[$interval])) by (job, grpc_method, le)) * 1", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"unary\", grpc_method!=\"RemoteWrite\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P50 {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p99 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 } ], @@ -1179,26 +1384,7 @@ "height": "250px", "panels": [ { - "aliasColors": { - "Aborted": "#EAB839", - "AlreadyExists": "#7EB26D", - "Canceled": "#E24D42", - "DataLoss": "#E24D42", - "DeadlineExceeded": "#E24D42", - "FailedPrecondition": "#6ED0E0", - "Internal": "#E24D42", - "InvalidArgument": "#EF843C", - "NotFound": "#EF843C", - "OK": "#7EB26D", - "OutOfRange": "#E24D42", - "PermissionDenied": "#EF843C", - "ResourceExhausted": "#E24D42", - "Unauthenticated": "#EF843C", - "Unavailable": "#E24D42", - "Unimplemented": "#6ED0E0", - "Unknown": "#E24D42", - "error": "#E24D42" - }, + "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, @@ -1223,14 +1409,87 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "/Aborted/", + "color": "#EAB839" + }, + { + "alias": "/AlreadyExists/", + "color": "#37872D" + }, + { + "alias": "/FailedPrecondition/", + "color": "#E0B400" + }, + { + "alias": "/Unimplemented/", + "color": "#E0B400" + }, + { + "alias": "/InvalidArgument/", + "color": "#1F60C4" + }, + { + "alias": "/NotFound/", + "color": "#1F60C4" + }, + { + "alias": "/PermissionDenied/", + "color": "#1F60C4" + }, + { + "alias": "/Unauthenticated/", + "color": "#1F60C4" + }, + { + "alias": "/Canceled/", + "color": "#C4162A" + }, + { + "alias": "/DataLoss/", + "color": "#C4162A" + }, + { + "alias": "/DeadlineExceeded/", + "color": "#C4162A" + }, + { + "alias": "/Internal/", + "color": "#C4162A" + }, + { + "alias": "/OutOfRange/", + "color": "#C4162A" + }, + { + "alias": "/ResourceExhausted/", + "color": "#C4162A" + }, + { + "alias": "/Unavailable/", + "color": "#C4162A" + }, + { + "alias": "/Unknown/", + "color": "#C4162A" + }, + { + "alias": "/OK/", + "color": "#37872D" + }, + { + "alias": "error", + "color": "#C4162A" + } + ], "spaceLength": 10, "span": 4, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_server_handled_total{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) by (job, grpc_method, grpc_code)", + "expr": "sum by (job, grpc_method, grpc_code) (rate(grpc_server_handled_total{job=\"$job\", grpc_type=\"server_stream\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{grpc_method}} {{grpc_code}}", @@ -1309,7 +1568,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) / sum(rate(grpc_server_started_total{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval]))", + "expr": "sum by (job) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=\"$job\", grpc_type=\"server_stream\"}[$interval])) / sum by (job) (rate(grpc_server_handled_total{job=\"$job\", grpc_type=\"server_stream\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -1379,34 +1638,62 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) by (job, grpc_method, le)) * 1", + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"server_stream\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P99 {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p50 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "sum(rate(grpc_server_handling_seconds_sum{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) by (job) * 1\n/\nsum(rate(grpc_server_handling_seconds_count{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) by (job)\n", + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"server_stream\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "mean {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p90 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(grpc_server_handling_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) by (job, grpc_method, le)) * 1", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"server_stream\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P50 {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p99 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 } ], @@ -1503,6 +1790,7 @@ "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value", @@ -1524,7 +1812,7 @@ ], "targets": [ { - "expr": "time() - max(thanos_objstore_bucket_last_successful_upload_time{namespace=\"$namespace\",job=~\"$job\"}) by (job, bucket)", + "expr": "time() - max by (job, bucket) (thanos_objstore_bucket_last_successful_upload_time{job=\"$job\"})", "format": "table", "instant": true, "intervalFactor": 2, @@ -1614,7 +1902,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_memstats_alloc_bytes{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_memstats_alloc_bytes{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "alloc all {{instance}}", @@ -1622,7 +1910,7 @@ "step": 10 }, { - "expr": "go_memstats_heap_alloc_bytes{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_memstats_heap_alloc_bytes{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "alloc heap {{instance}}", @@ -1630,7 +1918,7 @@ "step": 10 }, { - "expr": "rate(go_memstats_alloc_bytes_total{namespace=\"$namespace\",job=~\"$job\"}[30s])", + "expr": "rate(go_memstats_alloc_bytes_total{job=\"$job\"})[30s]", "format": "time_series", "intervalFactor": 2, "legendFormat": "alloc rate all {{instance}}", @@ -1638,7 +1926,7 @@ "step": 10 }, { - "expr": "rate(go_memstats_heap_alloc_bytes{namespace=\"$namespace\",job=~\"$job\"}[30s])", + "expr": "rate(go_memstats_heap_alloc_bytes{job=\"$job\"})[30s]", "format": "time_series", "intervalFactor": 2, "legendFormat": "alloc rate heap {{instance}}", @@ -1646,18 +1934,18 @@ "step": 10 }, { - "expr": "go_memstats_stack_inuse_bytes{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_memstats_stack_inuse_bytes{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "inuse stack {{instance}}", + "legendFormat": "inuse heap {{instance}}", "legendLink": null, "step": 10 }, { - "expr": "go_memstats_heap_inuse_bytes{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_memstats_heap_inuse_bytes{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "inuse heap {{instance}}", + "legendFormat": "inuse stack {{instance}}", "legendLink": null, "step": 10 } @@ -1730,7 +2018,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_goroutines{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_goroutines{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -1806,7 +2094,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_gc_duration_seconds{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_gc_duration_seconds{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{quantile}} {{instance}}", @@ -1868,8 +2156,8 @@ "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": null, @@ -1881,27 +2169,22 @@ "type": "datasource" }, { - "allValue": null, - "current": { }, - "datasource": "$datasource", + "auto": true, + "auto_count": 300, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(thanos_status{}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "label": "interval", + "name": "interval", + "query": "5m,10m,30m,1h,6h,12h", + "refresh": 2, + "type": "interval" }, { - "allValue": "thanos-receive.*", + "allValue": null, "current": { "text": "all", "value": "$__all" @@ -1913,7 +2196,7 @@ "multi": false, "name": "job", "options": [ ], - "query": "label_values(up{namespace=\"$namespace\", job=~\"thanos-receive.*\"}, job)", + "query": "label_values(up{job=~\"thanos-receive.*\"}, job)", "refresh": 1, "regex": "", "sort": 2, @@ -1922,21 +2205,6 @@ "tagsQuery": "", "type": "query", "useTags": false - }, - { - "auto": true, - "auto_count": 300, - "auto_min": "10s", - "current": { - "text": "5m", - "value": "5m" - }, - "hide": 0, - "label": "interval", - "name": "interval", - "query": "5m,10m,30m,1h,6h,12h", - "refresh": 2, - "type": "interval" } ] }, diff --git a/examples/dashboards/rule.json b/examples/dashboards/rule.json index faea9dc787e..c0120c20bdc 100644 --- a/examples/dashboards/rule.json +++ b/examples/dashboards/rule.json @@ -45,7 +45,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (strategy) (rate(prometheus_rule_evaluations_total{namespace=\"$namespace\",job=\"$job\"}[$interval]))\n", + "expr": "sum by (job, strategy) (rate(prometheus_rule_evaluations_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ strategy }}", @@ -121,7 +121,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (strategy) (increase(prometheus_rule_group_iterations_missed_total{namespace=\"$namespace\",job=\"$job\"}[$interval]))\n", + "expr": "sum by (job, strategy) (increase(prometheus_rule_group_iterations_missed_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ strategy }}", @@ -197,7 +197,7 @@ "steppedLine": false, "targets": [ { - "expr": "(\n max by(rule_group) (prometheus_rule_group_last_duration_seconds{namespace=\"$namespace\",job=\"$job\"})\n >\n sum by(rule_group) (prometheus_rule_group_interval_seconds{namespace=\"$namespace\",job=\"$job\"})\n)\n", + "expr": "(\n max by(job, rule_group) (prometheus_rule_group_last_duration_seconds{job=\"$job\"})\n >\n sum by(job, rule_group) (prometheus_rule_group_interval_seconds{job=\"$job\"})\n)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ rule_group }}", @@ -286,7 +286,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_alert_sender_alerts_dropped_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, alertmanager)", + "expr": "sum by (job, alertmanager) (rate(thanos_alert_sender_alerts_dropped_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{alertmanager}}", @@ -363,7 +363,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_alert_sender_alerts_sent_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, alertmanager)", + "expr": "sum by (job, alertmanager) (rate(thanos_alert_sender_alerts_sent_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{alertmanager}}", @@ -442,7 +442,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_alert_sender_errors_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) / sum(rate(thanos_alert_sender_alerts_sent_total{namespace=\"$namespace\",job=~\"$job\"}[$interval]))", + "expr": "sum by (job) (rate(thanos_alert_sender_errors_total{job=\"$job\"}[$interval])) / sum by (job) (rate(thanos_alert_sender_alerts_sent_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -512,34 +512,62 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], "spaceLength": 10, "span": 3, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_alert_sender_latency_seconds_bucket{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(thanos_alert_sender_latency_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P99 {{job}}", + "legendFormat": "p50 {{job}}", + "logBase": 10, + "max": null, + "min": null, "refId": "A", "step": 10 }, { - "expr": "sum(rate(thanos_alert_sender_latency_seconds_sum{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job) * 1 / sum(rate(thanos_alert_sender_latency_seconds_count{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job)", + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(thanos_alert_sender_latency_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "mean {{job}}", - "refId": "B", + "legendFormat": "p90 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_alert_sender_latency_seconds_bucket{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(thanos_alert_sender_latency_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P50 {{job}}", - "refId": "C", + "legendFormat": "p99 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 } ], @@ -624,7 +652,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_alert_queue_alerts_dropped_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job)", + "expr": "sum by (job) (rate(thanos_alert_queue_alerts_dropped_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}}", @@ -703,7 +731,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_alert_queue_alerts_dropped_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) / sum(rate(thanos_alert_queue_alerts_pushed_total{namespace=\"$namespace\",job=~\"$job\"}[$interval]))", + "expr": "sum by (job) (rate(thanos_alert_queue_alerts_dropped_total{job=\"$job\"}[$interval])) / sum by (job) (rate(thanos_alert_queue_alerts_pushed_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -760,26 +788,7 @@ "height": "250px", "panels": [ { - "aliasColors": { - "Aborted": "#EAB839", - "AlreadyExists": "#7EB26D", - "Canceled": "#E24D42", - "DataLoss": "#E24D42", - "DeadlineExceeded": "#E24D42", - "FailedPrecondition": "#6ED0E0", - "Internal": "#E24D42", - "InvalidArgument": "#EF843C", - "NotFound": "#EF843C", - "OK": "#7EB26D", - "OutOfRange": "#E24D42", - "PermissionDenied": "#EF843C", - "ResourceExhausted": "#E24D42", - "Unauthenticated": "#EF843C", - "Unavailable": "#E24D42", - "Unimplemented": "#6ED0E0", - "Unknown": "#E24D42", - "error": "#E24D42" - }, + "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, @@ -804,14 +813,87 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "/Aborted/", + "color": "#EAB839" + }, + { + "alias": "/AlreadyExists/", + "color": "#37872D" + }, + { + "alias": "/FailedPrecondition/", + "color": "#E0B400" + }, + { + "alias": "/Unimplemented/", + "color": "#E0B400" + }, + { + "alias": "/InvalidArgument/", + "color": "#1F60C4" + }, + { + "alias": "/NotFound/", + "color": "#1F60C4" + }, + { + "alias": "/PermissionDenied/", + "color": "#1F60C4" + }, + { + "alias": "/Unauthenticated/", + "color": "#1F60C4" + }, + { + "alias": "/Canceled/", + "color": "#C4162A" + }, + { + "alias": "/DataLoss/", + "color": "#C4162A" + }, + { + "alias": "/DeadlineExceeded/", + "color": "#C4162A" + }, + { + "alias": "/Internal/", + "color": "#C4162A" + }, + { + "alias": "/OutOfRange/", + "color": "#C4162A" + }, + { + "alias": "/ResourceExhausted/", + "color": "#C4162A" + }, + { + "alias": "/Unavailable/", + "color": "#C4162A" + }, + { + "alias": "/Unknown/", + "color": "#C4162A" + }, + { + "alias": "/OK/", + "color": "#37872D" + }, + { + "alias": "error", + "color": "#C4162A" + } + ], "spaceLength": 10, "span": 4, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_server_handled_total{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval])) by (job, grpc_method, grpc_code)", + "expr": "sum by (job, grpc_method, grpc_code) (rate(grpc_server_handled_total{job=\"$job\", grpc_type=\"unary\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{grpc_method}} {{grpc_code}}", @@ -890,7 +972,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval])) / sum(rate(grpc_server_started_total{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval]))", + "expr": "sum by (job) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=\"$job\", grpc_type=\"unary\"}[$interval])) / sum by (job) (rate(grpc_server_handled_total{job=\"$job\", grpc_type=\"unary\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -960,34 +1042,62 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval])) by (job, grpc_method, le)) * 1", + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"unary\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P99 {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p50 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "sum(rate(grpc_server_handling_seconds_sum{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval])) by (job) * 1\n/\nsum(rate(grpc_server_handling_seconds_count{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval])) by (job)\n", + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"unary\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "mean {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p90 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(grpc_server_handling_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval])) by (job, grpc_method, le)) * 1", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"unary\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P50 {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p99 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 } ], @@ -1040,26 +1150,7 @@ "height": "250px", "panels": [ { - "aliasColors": { - "Aborted": "#EAB839", - "AlreadyExists": "#7EB26D", - "Canceled": "#E24D42", - "DataLoss": "#E24D42", - "DeadlineExceeded": "#E24D42", - "FailedPrecondition": "#6ED0E0", - "Internal": "#E24D42", - "InvalidArgument": "#EF843C", - "NotFound": "#EF843C", - "OK": "#7EB26D", - "OutOfRange": "#E24D42", - "PermissionDenied": "#EF843C", - "ResourceExhausted": "#E24D42", - "Unauthenticated": "#EF843C", - "Unavailable": "#E24D42", - "Unimplemented": "#6ED0E0", - "Unknown": "#E24D42", - "error": "#E24D42" - }, + "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, @@ -1084,14 +1175,87 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "/Aborted/", + "color": "#EAB839" + }, + { + "alias": "/AlreadyExists/", + "color": "#37872D" + }, + { + "alias": "/FailedPrecondition/", + "color": "#E0B400" + }, + { + "alias": "/Unimplemented/", + "color": "#E0B400" + }, + { + "alias": "/InvalidArgument/", + "color": "#1F60C4" + }, + { + "alias": "/NotFound/", + "color": "#1F60C4" + }, + { + "alias": "/PermissionDenied/", + "color": "#1F60C4" + }, + { + "alias": "/Unauthenticated/", + "color": "#1F60C4" + }, + { + "alias": "/Canceled/", + "color": "#C4162A" + }, + { + "alias": "/DataLoss/", + "color": "#C4162A" + }, + { + "alias": "/DeadlineExceeded/", + "color": "#C4162A" + }, + { + "alias": "/Internal/", + "color": "#C4162A" + }, + { + "alias": "/OutOfRange/", + "color": "#C4162A" + }, + { + "alias": "/ResourceExhausted/", + "color": "#C4162A" + }, + { + "alias": "/Unavailable/", + "color": "#C4162A" + }, + { + "alias": "/Unknown/", + "color": "#C4162A" + }, + { + "alias": "/OK/", + "color": "#37872D" + }, + { + "alias": "error", + "color": "#C4162A" + } + ], "spaceLength": 10, "span": 4, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_server_handled_total{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) by (job, grpc_method, grpc_code)", + "expr": "sum by (job, grpc_method, grpc_code) (rate(grpc_server_handled_total{job=\"$job\", grpc_type=\"server_stream\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{grpc_method}} {{grpc_code}}", @@ -1170,7 +1334,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) / sum(rate(grpc_server_started_total{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval]))", + "expr": "sum by (job) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=\"$job\", grpc_type=\"server_stream\"}[$interval])) / sum by (job) (rate(grpc_server_handled_total{job=\"$job\", grpc_type=\"server_stream\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -1240,34 +1404,62 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) by (job, grpc_method, le)) * 1", + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"server_stream\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P99 {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p50 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "sum(rate(grpc_server_handling_seconds_sum{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) by (job) * 1\n/\nsum(rate(grpc_server_handling_seconds_count{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) by (job)\n", + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"server_stream\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "mean {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p90 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(grpc_server_handling_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) by (job, grpc_method, le)) * 1", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"server_stream\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P50 {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p99 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 } ], @@ -1351,7 +1543,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_memstats_alloc_bytes{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_memstats_alloc_bytes{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "alloc all {{instance}}", @@ -1359,7 +1551,7 @@ "step": 10 }, { - "expr": "go_memstats_heap_alloc_bytes{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_memstats_heap_alloc_bytes{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "alloc heap {{instance}}", @@ -1367,7 +1559,7 @@ "step": 10 }, { - "expr": "rate(go_memstats_alloc_bytes_total{namespace=\"$namespace\",job=~\"$job\"}[30s])", + "expr": "rate(go_memstats_alloc_bytes_total{job=\"$job\"})[30s]", "format": "time_series", "intervalFactor": 2, "legendFormat": "alloc rate all {{instance}}", @@ -1375,7 +1567,7 @@ "step": 10 }, { - "expr": "rate(go_memstats_heap_alloc_bytes{namespace=\"$namespace\",job=~\"$job\"}[30s])", + "expr": "rate(go_memstats_heap_alloc_bytes{job=\"$job\"})[30s]", "format": "time_series", "intervalFactor": 2, "legendFormat": "alloc rate heap {{instance}}", @@ -1383,18 +1575,18 @@ "step": 10 }, { - "expr": "go_memstats_stack_inuse_bytes{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_memstats_stack_inuse_bytes{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "inuse stack {{instance}}", + "legendFormat": "inuse heap {{instance}}", "legendLink": null, "step": 10 }, { - "expr": "go_memstats_heap_inuse_bytes{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_memstats_heap_inuse_bytes{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "inuse heap {{instance}}", + "legendFormat": "inuse stack {{instance}}", "legendLink": null, "step": 10 } @@ -1467,7 +1659,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_goroutines{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_goroutines{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -1543,7 +1735,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_gc_duration_seconds{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_gc_duration_seconds{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{quantile}} {{instance}}", @@ -1605,8 +1797,8 @@ "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": null, @@ -1618,27 +1810,22 @@ "type": "datasource" }, { - "allValue": null, - "current": { }, - "datasource": "$datasource", + "auto": true, + "auto_count": 300, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(thanos_status{}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "label": "interval", + "name": "interval", + "query": "5m,10m,30m,1h,6h,12h", + "refresh": 2, + "type": "interval" }, { - "allValue": "thanos-rule.*", + "allValue": null, "current": { "text": "all", "value": "$__all" @@ -1650,7 +1837,7 @@ "multi": false, "name": "job", "options": [ ], - "query": "label_values(up{namespace=\"$namespace\", job=~\"thanos-rule.*\"}, job)", + "query": "label_values(up{job=~\"thanos-rule.*\"}, job)", "refresh": 1, "regex": "", "sort": 2, @@ -1659,21 +1846,6 @@ "tagsQuery": "", "type": "query", "useTags": false - }, - { - "auto": true, - "auto_count": 300, - "auto_min": "10s", - "current": { - "text": "5m", - "value": "5m" - }, - "hide": 0, - "label": "interval", - "name": "interval", - "query": "5m,10m,30m,1h,6h,12h", - "refresh": 2, - "type": "interval" } ] }, diff --git a/examples/dashboards/sidecar.json b/examples/dashboards/sidecar.json index 33e47f0902d..f4512e75a10 100644 --- a/examples/dashboards/sidecar.json +++ b/examples/dashboards/sidecar.json @@ -14,26 +14,7 @@ "height": "250px", "panels": [ { - "aliasColors": { - "Aborted": "#EAB839", - "AlreadyExists": "#7EB26D", - "Canceled": "#E24D42", - "DataLoss": "#E24D42", - "DeadlineExceeded": "#E24D42", - "FailedPrecondition": "#6ED0E0", - "Internal": "#E24D42", - "InvalidArgument": "#EF843C", - "NotFound": "#EF843C", - "OK": "#7EB26D", - "OutOfRange": "#E24D42", - "PermissionDenied": "#EF843C", - "ResourceExhausted": "#E24D42", - "Unauthenticated": "#EF843C", - "Unavailable": "#E24D42", - "Unimplemented": "#6ED0E0", - "Unknown": "#E24D42", - "error": "#E24D42" - }, + "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, @@ -58,14 +39,87 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "/Aborted/", + "color": "#EAB839" + }, + { + "alias": "/AlreadyExists/", + "color": "#37872D" + }, + { + "alias": "/FailedPrecondition/", + "color": "#E0B400" + }, + { + "alias": "/Unimplemented/", + "color": "#E0B400" + }, + { + "alias": "/InvalidArgument/", + "color": "#1F60C4" + }, + { + "alias": "/NotFound/", + "color": "#1F60C4" + }, + { + "alias": "/PermissionDenied/", + "color": "#1F60C4" + }, + { + "alias": "/Unauthenticated/", + "color": "#1F60C4" + }, + { + "alias": "/Canceled/", + "color": "#C4162A" + }, + { + "alias": "/DataLoss/", + "color": "#C4162A" + }, + { + "alias": "/DeadlineExceeded/", + "color": "#C4162A" + }, + { + "alias": "/Internal/", + "color": "#C4162A" + }, + { + "alias": "/OutOfRange/", + "color": "#C4162A" + }, + { + "alias": "/ResourceExhausted/", + "color": "#C4162A" + }, + { + "alias": "/Unavailable/", + "color": "#C4162A" + }, + { + "alias": "/Unknown/", + "color": "#C4162A" + }, + { + "alias": "/OK/", + "color": "#37872D" + }, + { + "alias": "error", + "color": "#C4162A" + } + ], "spaceLength": 10, "span": 4, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_server_handled_total{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval])) by (job, grpc_method, grpc_code)", + "expr": "sum by (job, grpc_method, grpc_code) (rate(grpc_server_handled_total{job=\"$job\", grpc_type=\"unary\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{grpc_method}} {{grpc_code}}", @@ -144,7 +198,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval])) / sum(rate(grpc_server_started_total{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval]))", + "expr": "sum by (job) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=\"$job\", grpc_type=\"unary\"}[$interval])) / sum by (job) (rate(grpc_server_handled_total{job=\"$job\", grpc_type=\"unary\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -214,34 +268,62 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval])) by (job, grpc_method, le)) * 1", + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"unary\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P99 {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p50 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "sum(rate(grpc_server_handling_seconds_sum{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval])) by (job) * 1\n/\nsum(rate(grpc_server_handling_seconds_count{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval])) by (job)\n", + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"unary\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "mean {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p90 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(grpc_server_handling_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval])) by (job, grpc_method, le)) * 1", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"unary\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P50 {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p99 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 } ], @@ -294,26 +376,7 @@ "height": "250px", "panels": [ { - "aliasColors": { - "Aborted": "#EAB839", - "AlreadyExists": "#7EB26D", - "Canceled": "#E24D42", - "DataLoss": "#E24D42", - "DeadlineExceeded": "#E24D42", - "FailedPrecondition": "#6ED0E0", - "Internal": "#E24D42", - "InvalidArgument": "#EF843C", - "NotFound": "#EF843C", - "OK": "#7EB26D", - "OutOfRange": "#E24D42", - "PermissionDenied": "#EF843C", - "ResourceExhausted": "#E24D42", - "Unauthenticated": "#EF843C", - "Unavailable": "#E24D42", - "Unimplemented": "#6ED0E0", - "Unknown": "#E24D42", - "error": "#E24D42" - }, + "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, @@ -338,14 +401,87 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "/Aborted/", + "color": "#EAB839" + }, + { + "alias": "/AlreadyExists/", + "color": "#37872D" + }, + { + "alias": "/FailedPrecondition/", + "color": "#E0B400" + }, + { + "alias": "/Unimplemented/", + "color": "#E0B400" + }, + { + "alias": "/InvalidArgument/", + "color": "#1F60C4" + }, + { + "alias": "/NotFound/", + "color": "#1F60C4" + }, + { + "alias": "/PermissionDenied/", + "color": "#1F60C4" + }, + { + "alias": "/Unauthenticated/", + "color": "#1F60C4" + }, + { + "alias": "/Canceled/", + "color": "#C4162A" + }, + { + "alias": "/DataLoss/", + "color": "#C4162A" + }, + { + "alias": "/DeadlineExceeded/", + "color": "#C4162A" + }, + { + "alias": "/Internal/", + "color": "#C4162A" + }, + { + "alias": "/OutOfRange/", + "color": "#C4162A" + }, + { + "alias": "/ResourceExhausted/", + "color": "#C4162A" + }, + { + "alias": "/Unavailable/", + "color": "#C4162A" + }, + { + "alias": "/Unknown/", + "color": "#C4162A" + }, + { + "alias": "/OK/", + "color": "#37872D" + }, + { + "alias": "error", + "color": "#C4162A" + } + ], "spaceLength": 10, "span": 4, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_server_handled_total{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) by (job, grpc_method, grpc_code)", + "expr": "sum by (job, grpc_method, grpc_code) (rate(grpc_server_handled_total{job=\"$job\", grpc_type=\"server_stream\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{grpc_method}} {{grpc_code}}", @@ -423,7 +559,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) / sum(rate(grpc_server_started_total{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval]))", + "expr": "sum by (job) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=\"$job\", grpc_type=\"server_stream\"}[$interval])) / sum by (job) (rate(grpc_server_handled_total{job=\"$job\", grpc_type=\"server_stream\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -493,34 +629,62 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) by (job, grpc_method, le)) * 1", + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"server_stream\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P99 {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p50 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "sum(rate(grpc_server_handling_seconds_sum{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) by (job) * 1\n/\nsum(rate(grpc_server_handling_seconds_count{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) by (job)\n", + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"server_stream\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "mean {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p90 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(grpc_server_handling_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) by (job, grpc_method, le)) * 1", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"server_stream\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P50 {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p99 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 } ], @@ -617,6 +781,7 @@ "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value", @@ -638,7 +803,7 @@ ], "targets": [ { - "expr": "time() - max(thanos_objstore_bucket_last_successful_upload_time{namespace=\"$namespace\",job=~\"$job\"}) by (job, bucket)", + "expr": "time() - max by (job, bucket) (thanos_objstore_bucket_last_successful_upload_time{job=\"$job\"})", "format": "table", "instant": true, "intervalFactor": 2, @@ -728,7 +893,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_objstore_bucket_operations_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, operation)", + "expr": "sum by (job, operation) (rate(thanos_objstore_bucket_operations_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{operation}}", @@ -806,7 +971,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_objstore_bucket_operation_failures_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) / sum(rate(thanos_objstore_bucket_operations_total{namespace=\"$namespace\",job=~\"$job\"}[$interval]))", + "expr": "sum by (job) (rate(thanos_objstore_bucket_operation_failures_total{job=\"$job\"}[$interval])) / sum by (job) (rate(thanos_objstore_bucket_operations_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -875,34 +1040,62 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P99 {{job}}", + "legendFormat": "p50 {{job}}", + "logBase": 10, + "max": null, + "min": null, "refId": "A", "step": 10 }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job) * 1 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job)", + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "mean {{job}}", - "refId": "B", + "legendFormat": "p90 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P50 {{job}}", - "refId": "C", + "legendFormat": "p99 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 } ], @@ -986,7 +1179,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_memstats_alloc_bytes{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_memstats_alloc_bytes{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "alloc all {{instance}}", @@ -994,7 +1187,7 @@ "step": 10 }, { - "expr": "go_memstats_heap_alloc_bytes{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_memstats_heap_alloc_bytes{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "alloc heap {{instance}}", @@ -1002,7 +1195,7 @@ "step": 10 }, { - "expr": "rate(go_memstats_alloc_bytes_total{namespace=\"$namespace\",job=~\"$job\"}[30s])", + "expr": "rate(go_memstats_alloc_bytes_total{job=\"$job\"})[30s]", "format": "time_series", "intervalFactor": 2, "legendFormat": "alloc rate all {{instance}}", @@ -1010,7 +1203,7 @@ "step": 10 }, { - "expr": "rate(go_memstats_heap_alloc_bytes{namespace=\"$namespace\",job=~\"$job\"}[30s])", + "expr": "rate(go_memstats_heap_alloc_bytes{job=\"$job\"})[30s]", "format": "time_series", "intervalFactor": 2, "legendFormat": "alloc rate heap {{instance}}", @@ -1018,18 +1211,18 @@ "step": 10 }, { - "expr": "go_memstats_stack_inuse_bytes{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_memstats_stack_inuse_bytes{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "inuse stack {{instance}}", + "legendFormat": "inuse heap {{instance}}", "legendLink": null, "step": 10 }, { - "expr": "go_memstats_heap_inuse_bytes{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_memstats_heap_inuse_bytes{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "inuse heap {{instance}}", + "legendFormat": "inuse stack {{instance}}", "legendLink": null, "step": 10 } @@ -1102,7 +1295,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_goroutines{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_goroutines{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -1178,7 +1371,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_gc_duration_seconds{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_gc_duration_seconds{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{quantile}} {{instance}}", @@ -1240,8 +1433,8 @@ "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": null, @@ -1253,27 +1446,22 @@ "type": "datasource" }, { - "allValue": null, - "current": { }, - "datasource": "$datasource", + "auto": true, + "auto_count": 300, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(thanos_status{}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "label": "interval", + "name": "interval", + "query": "5m,10m,30m,1h,6h,12h", + "refresh": 2, + "type": "interval" }, { - "allValue": "thanos-sidecar.*", + "allValue": null, "current": { "text": "all", "value": "$__all" @@ -1285,7 +1473,7 @@ "multi": false, "name": "job", "options": [ ], - "query": "label_values(up{namespace=\"$namespace\", job=~\"thanos-sidecar.*\"}, job)", + "query": "label_values(up{job=~\"thanos-sidecar.*\"}, job)", "refresh": 1, "regex": "", "sort": 2, @@ -1294,21 +1482,6 @@ "tagsQuery": "", "type": "query", "useTags": false - }, - { - "auto": true, - "auto_count": 300, - "auto_min": "10s", - "current": { - "text": "5m", - "value": "5m" - }, - "hide": 0, - "label": "interval", - "name": "interval", - "query": "5m,10m,30m,1h,6h,12h", - "refresh": 2, - "type": "interval" } ] }, diff --git a/examples/dashboards/store.json b/examples/dashboards/store.json index 0e9710342d6..71e70a9022f 100644 --- a/examples/dashboards/store.json +++ b/examples/dashboards/store.json @@ -14,26 +14,7 @@ "height": "250px", "panels": [ { - "aliasColors": { - "Aborted": "#EAB839", - "AlreadyExists": "#7EB26D", - "Canceled": "#E24D42", - "DataLoss": "#E24D42", - "DeadlineExceeded": "#E24D42", - "FailedPrecondition": "#6ED0E0", - "Internal": "#E24D42", - "InvalidArgument": "#EF843C", - "NotFound": "#EF843C", - "OK": "#7EB26D", - "OutOfRange": "#E24D42", - "PermissionDenied": "#EF843C", - "ResourceExhausted": "#E24D42", - "Unauthenticated": "#EF843C", - "Unavailable": "#E24D42", - "Unimplemented": "#6ED0E0", - "Unknown": "#E24D42", - "error": "#E24D42" - }, + "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, @@ -58,14 +39,87 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "/Aborted/", + "color": "#EAB839" + }, + { + "alias": "/AlreadyExists/", + "color": "#37872D" + }, + { + "alias": "/FailedPrecondition/", + "color": "#E0B400" + }, + { + "alias": "/Unimplemented/", + "color": "#E0B400" + }, + { + "alias": "/InvalidArgument/", + "color": "#1F60C4" + }, + { + "alias": "/NotFound/", + "color": "#1F60C4" + }, + { + "alias": "/PermissionDenied/", + "color": "#1F60C4" + }, + { + "alias": "/Unauthenticated/", + "color": "#1F60C4" + }, + { + "alias": "/Canceled/", + "color": "#C4162A" + }, + { + "alias": "/DataLoss/", + "color": "#C4162A" + }, + { + "alias": "/DeadlineExceeded/", + "color": "#C4162A" + }, + { + "alias": "/Internal/", + "color": "#C4162A" + }, + { + "alias": "/OutOfRange/", + "color": "#C4162A" + }, + { + "alias": "/ResourceExhausted/", + "color": "#C4162A" + }, + { + "alias": "/Unavailable/", + "color": "#C4162A" + }, + { + "alias": "/Unknown/", + "color": "#C4162A" + }, + { + "alias": "/OK/", + "color": "#37872D" + }, + { + "alias": "error", + "color": "#C4162A" + } + ], "spaceLength": 10, "span": 4, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_server_handled_total{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval])) by (job, grpc_method, grpc_code)", + "expr": "sum by (job, grpc_method, grpc_code) (rate(grpc_server_handled_total{job=\"$job\", grpc_type=\"unary\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{grpc_method}} {{grpc_code}}", @@ -144,7 +198,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval])) / sum(rate(grpc_server_started_total{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval]))", + "expr": "sum by (job) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=\"$job\", grpc_type=\"unary\"}[$interval])) / sum by (job) (rate(grpc_server_handled_total{job=\"$job\", grpc_type=\"unary\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -214,34 +268,62 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval])) by (job, grpc_method, le)) * 1", + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"unary\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P99 {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p50 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "sum(rate(grpc_server_handling_seconds_sum{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval])) by (job) * 1\n/\nsum(rate(grpc_server_handling_seconds_count{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval])) by (job)\n", + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"unary\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "mean {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p90 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(grpc_server_handling_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"unary\"}[$interval])) by (job, grpc_method, le)) * 1", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"unary\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P50 {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p99 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 } ], @@ -294,26 +376,7 @@ "height": "250px", "panels": [ { - "aliasColors": { - "Aborted": "#EAB839", - "AlreadyExists": "#7EB26D", - "Canceled": "#E24D42", - "DataLoss": "#E24D42", - "DeadlineExceeded": "#E24D42", - "FailedPrecondition": "#6ED0E0", - "Internal": "#E24D42", - "InvalidArgument": "#EF843C", - "NotFound": "#EF843C", - "OK": "#7EB26D", - "OutOfRange": "#E24D42", - "PermissionDenied": "#EF843C", - "ResourceExhausted": "#E24D42", - "Unauthenticated": "#EF843C", - "Unavailable": "#E24D42", - "Unimplemented": "#6ED0E0", - "Unknown": "#E24D42", - "error": "#E24D42" - }, + "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, @@ -338,14 +401,87 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "/Aborted/", + "color": "#EAB839" + }, + { + "alias": "/AlreadyExists/", + "color": "#37872D" + }, + { + "alias": "/FailedPrecondition/", + "color": "#E0B400" + }, + { + "alias": "/Unimplemented/", + "color": "#E0B400" + }, + { + "alias": "/InvalidArgument/", + "color": "#1F60C4" + }, + { + "alias": "/NotFound/", + "color": "#1F60C4" + }, + { + "alias": "/PermissionDenied/", + "color": "#1F60C4" + }, + { + "alias": "/Unauthenticated/", + "color": "#1F60C4" + }, + { + "alias": "/Canceled/", + "color": "#C4162A" + }, + { + "alias": "/DataLoss/", + "color": "#C4162A" + }, + { + "alias": "/DeadlineExceeded/", + "color": "#C4162A" + }, + { + "alias": "/Internal/", + "color": "#C4162A" + }, + { + "alias": "/OutOfRange/", + "color": "#C4162A" + }, + { + "alias": "/ResourceExhausted/", + "color": "#C4162A" + }, + { + "alias": "/Unavailable/", + "color": "#C4162A" + }, + { + "alias": "/Unknown/", + "color": "#C4162A" + }, + { + "alias": "/OK/", + "color": "#37872D" + }, + { + "alias": "error", + "color": "#C4162A" + } + ], "spaceLength": 10, "span": 4, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_server_handled_total{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) by (job, grpc_method, grpc_code)", + "expr": "sum by (job, grpc_method, grpc_code) (rate(grpc_server_handled_total{job=\"$job\", grpc_type=\"server_stream\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{grpc_method}} {{grpc_code}}", @@ -424,7 +560,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) / sum(rate(grpc_server_started_total{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval]))", + "expr": "sum by (job) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=\"$job\", grpc_type=\"server_stream\"}[$interval])) / sum by (job) (rate(grpc_server_handled_total{job=\"$job\", grpc_type=\"server_stream\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -494,34 +630,62 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) by (job, grpc_method, le)) * 1", + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"server_stream\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P99 {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p50 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "sum(rate(grpc_server_handling_seconds_sum{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) by (job) * 1\n/\nsum(rate(grpc_server_handling_seconds_count{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) by (job)\n", + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"server_stream\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "mean {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p90 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(grpc_server_handling_seconds_bucket{namespace=\"$namespace\",job=~\"$job\",grpc_type=\"server_stream\"}[$interval])) by (job, grpc_method, le)) * 1", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(grpc_server_handling_seconds_bucket{job=\"$job\", grpc_type=\"server_stream\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P50 {{job}} {{grpc_method}}", - "legendLink": null, + "legendFormat": "p99 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 } ], @@ -606,7 +770,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_objstore_bucket_operations_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, operation)", + "expr": "sum by (job, operation) (rate(thanos_objstore_bucket_operations_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{operation}}", @@ -683,7 +847,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (job, operation) (rate(thanos_objstore_bucket_operation_failures_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) / sum by (job, operation) (rate(thanos_objstore_bucket_operations_total{namespace=\"$namespace\",job=~\"$job\"}[$interval]))", + "expr": "sum by (job, operation) (rate(thanos_objstore_bucket_operation_failures_total{job=\"$job\"}[$interval])) / sum by (job, operation) (rate(thanos_objstore_bucket_operations_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{operation}}", @@ -760,7 +924,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, operation, le)) * 1", + "expr": "histogram_quantile(0.99, sum by (job, operation, le) (rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, "legendFormat": "P99 {{job}}", @@ -768,7 +932,7 @@ "step": 10 }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, operation) * 1 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, operation)", + "expr": "sum by (job, operation) (rate(thanos_objstore_bucket_operation_duration_seconds_sum{job=\"$job\"}[$interval])) * 1 / sum by (job, operation) (rate(thanos_objstore_bucket_operation_duration_seconds_count{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "mean {{job}}", @@ -776,7 +940,7 @@ "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, operation, le)) * 1", + "expr": "histogram_quantile(0.50, sum by (job, operation, le) (rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, "legendFormat": "P50 {{job}}", @@ -865,7 +1029,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_bucket_store_block_loads_total{namespace=\"$namespace\",job=~\"$job\"}[$interval]))", + "expr": "sum by (job) (rate(thanos_bucket_store_block_loads_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "block loads", @@ -944,7 +1108,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_bucket_store_block_load_failures_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) / sum(rate(thanos_bucket_store_block_loads_total{namespace=\"$namespace\",job=~\"$job\"}[$interval]))", + "expr": "sum by (job) (rate(thanos_bucket_store_block_load_failures_total{job=\"$job\"}[$interval])) / sum by (job) (rate(thanos_bucket_store_block_loads_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -1021,7 +1185,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_bucket_store_block_drops_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, operation)", + "expr": "sum by (job, operation) (rate(thanos_bucket_store_block_drops_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "block drops {{job}}", @@ -1100,7 +1264,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_bucket_store_block_drop_failures_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) / sum(rate(thanos_bucket_store_block_drops_total{namespace=\"$namespace\",job=~\"$job\"}[$interval]))", + "expr": "sum by (job) (rate(thanos_bucket_store_block_drop_failures_total{job=\"$job\"}[$interval])) / sum by (job) (rate(thanos_bucket_store_block_drops_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "error", @@ -1189,7 +1353,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_store_index_cache_requests_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, item_type)", + "expr": "sum by (job, item_type) (rate(thanos_store_index_cache_requests_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{item_type}}", @@ -1266,7 +1430,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_store_index_cache_hits_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, item_type)", + "expr": "sum by (job, item_type) (rate(thanos_store_index_cache_hits_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{item_type}}", @@ -1343,7 +1507,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_store_index_cache_items_added_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, item_type)", + "expr": "sum by (job, item_type) (rate(thanos_store_index_cache_items_added_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{item_type}}", @@ -1420,7 +1584,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_store_index_cache_items_evicted_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, item_type)", + "expr": "sum by (job, item_type) (rate(thanos_store_index_cache_items_evicted_total{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}} {{item_type}}", @@ -1509,7 +1673,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_bucket_store_sent_chunk_size_bytes_bucket{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, le))", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(thanos_bucket_store_sent_chunk_size_bytes_bucket{job=\"$job\"}[$interval])))", "format": "time_series", "intervalFactor": 2, "legendFormat": "P99", @@ -1517,7 +1681,7 @@ "step": 10 }, { - "expr": "sum(rate(thanos_bucket_store_sent_chunk_size_bytes_sum{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job) / sum(rate(thanos_bucket_store_sent_chunk_size_bytes_count{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job)", + "expr": "sum by (job) (rate(thanos_bucket_store_sent_chunk_size_bytes_sum{job=\"$job\"}[$interval])) / sum by (job) (rate(thanos_bucket_store_sent_chunk_size_bytes_count{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "mean", @@ -1525,7 +1689,7 @@ "step": 10 }, { - "expr": "histogram_quantile(0.99, sum(rate(thanos_bucket_store_sent_chunk_size_bytes_bucket{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, le))", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(thanos_bucket_store_sent_chunk_size_bytes_bucket{job=\"$job\"}[$interval])))", "format": "time_series", "intervalFactor": 2, "legendFormat": "P50", @@ -1613,7 +1777,7 @@ "steppedLine": false, "targets": [ { - "expr": "thanos_bucket_store_series_blocks_queried{namespace=\"$namespace\",job=~\"$job\",quantile=\"0.99\"}", + "expr": "thanos_bucket_store_series_blocks_queried{job=\"$job\", quantile=\"0.99\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "P99", @@ -1621,7 +1785,7 @@ "step": 10 }, { - "expr": "sum(rate(thanos_bucket_store_series_blocks_queried_sum{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job) / sum(rate(thanos_bucket_store_series_blocks_queried_count{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job)", + "expr": "sum by (job) (rate(thanos_bucket_store_series_blocks_queried_sum{job=\"$job\"}[$interval])) / sum by (job) (rate(thanos_bucket_store_series_blocks_queried_count{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "mean {{job}}", @@ -1629,7 +1793,7 @@ "step": 10 }, { - "expr": "thanos_bucket_store_series_blocks_queried{namespace=\"$namespace\",job=~\"$job\",quantile=\"0.50\"}", + "expr": "thanos_bucket_store_series_blocks_queried{job=\"$job\", quantile=\"0.50\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "P50", @@ -1706,7 +1870,7 @@ "steppedLine": false, "targets": [ { - "expr": "thanos_bucket_store_series_data_fetched{namespace=\"$namespace\",job=~\"$job\",quantile=\"0.99\"}", + "expr": "thanos_bucket_store_series_data_fetched{job=\"$job\", quantile=\"0.99\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "P99", @@ -1714,7 +1878,7 @@ "step": 10 }, { - "expr": "sum(rate(thanos_bucket_store_series_data_fetched_sum{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job) / sum(rate(thanos_bucket_store_series_data_fetched_count{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job)", + "expr": "sum by (job) (rate(thanos_bucket_store_series_data_fetched_sum{job=\"$job\"}[$interval])) / sum by (job) (rate(thanos_bucket_store_series_data_fetched_count{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "mean {{job}}", @@ -1722,7 +1886,7 @@ "step": 10 }, { - "expr": "thanos_bucket_store_series_data_fetched{namespace=\"$namespace\",job=~\"$job\",quantile=\"0.50\"}", + "expr": "thanos_bucket_store_series_data_fetched{job=\"$job\", quantile=\"0.50\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "P50", @@ -1798,7 +1962,7 @@ "steppedLine": false, "targets": [ { - "expr": "thanos_bucket_store_series_result_series{namespace=\"$namespace\",job=~\"$job\",quantile=\"0.99\"}", + "expr": "thanos_bucket_store_series_result_series{job=\"$job\",quantile=\"0.99\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "P99", @@ -1806,7 +1970,7 @@ "step": 10 }, { - "expr": "sum(rate(thanos_bucket_store_series_result_series_sum{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job) / sum(rate(thanos_bucket_store_series_result_series_count{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job)", + "expr": "sum by (job) (rate(thanos_bucket_store_series_result_series_sum{job=\"$job\"}[$interval])) / sum by (job) (rate(thanos_bucket_store_series_result_series_count{job=\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "mean {{job}}", @@ -1814,7 +1978,7 @@ "step": 10 }, { - "expr": "thanos_bucket_store_series_result_series{namespace=\"$namespace\",job=~\"$job\",quantile=\"0.50\"}", + "expr": "thanos_bucket_store_series_result_series{job=\"$job\",quantile=\"0.50\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "P50", @@ -1896,34 +2060,62 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_bucket_store_series_get_all_duration_seconds_bucket{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(thanos_bucket_store_series_get_all_duration_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P99 {{job}}", + "legendFormat": "p50 {{job}}", + "logBase": 10, + "max": null, + "min": null, "refId": "A", "step": 10 }, { - "expr": "sum(rate(thanos_bucket_store_series_get_all_duration_seconds_sum{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job) * 1 / sum(rate(thanos_bucket_store_series_get_all_duration_seconds_count{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job)", + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(thanos_bucket_store_series_get_all_duration_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "mean {{job}}", - "refId": "B", + "legendFormat": "p90 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_bucket_store_series_get_all_duration_seconds_bucket{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(thanos_bucket_store_series_get_all_duration_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P50 {{job}}", - "refId": "C", + "legendFormat": "p99 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 } ], @@ -1989,34 +2181,62 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_bucket_store_series_merge_duration_seconds_bucket{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(thanos_bucket_store_series_merge_duration_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P99 {{job}}", + "legendFormat": "p50 {{job}}", + "logBase": 10, + "max": null, + "min": null, "refId": "A", "step": 10 }, { - "expr": "sum(rate(thanos_bucket_store_series_merge_duration_seconds_sum{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job) * 1 / sum(rate(thanos_bucket_store_series_merge_duration_seconds_count{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job)", + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(thanos_bucket_store_series_merge_duration_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "mean {{job}}", - "refId": "B", + "legendFormat": "p90 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_bucket_store_series_merge_duration_seconds_bucket{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(thanos_bucket_store_series_merge_duration_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P50 {{job}}", - "refId": "C", + "legendFormat": "p99 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 } ], @@ -2082,34 +2302,62 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], "spaceLength": 10, "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_bucket_store_series_gate_duration_seconds_bucket{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(thanos_bucket_store_series_gate_duration_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P99 {{job}}", + "legendFormat": "p50 {{job}}", + "logBase": 10, + "max": null, + "min": null, "refId": "A", "step": 10 }, { - "expr": "sum(rate(thanos_bucket_store_series_gate_duration_seconds_sum{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job) * 1 / sum(rate(thanos_bucket_store_series_gate_duration_seconds_count{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job)", + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(thanos_bucket_store_series_gate_duration_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "mean {{job}}", - "refId": "B", + "legendFormat": "p90 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_bucket_store_series_gate_duration_seconds_bucket{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(thanos_bucket_store_series_gate_duration_seconds_bucket{job=\"$job\"}[$interval]))) * 1", "format": "time_series", "intervalFactor": 2, - "legendFormat": "P50 {{job}}", - "refId": "C", + "legendFormat": "p99 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "refId": "A", "step": 10 } ], @@ -2193,7 +2441,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_memstats_alloc_bytes{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_memstats_alloc_bytes{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "alloc all {{instance}}", @@ -2201,7 +2449,7 @@ "step": 10 }, { - "expr": "go_memstats_heap_alloc_bytes{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_memstats_heap_alloc_bytes{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "alloc heap {{instance}}", @@ -2209,7 +2457,7 @@ "step": 10 }, { - "expr": "rate(go_memstats_alloc_bytes_total{namespace=\"$namespace\",job=~\"$job\"}[30s])", + "expr": "rate(go_memstats_alloc_bytes_total{job=\"$job\"})[30s]", "format": "time_series", "intervalFactor": 2, "legendFormat": "alloc rate all {{instance}}", @@ -2217,7 +2465,7 @@ "step": 10 }, { - "expr": "rate(go_memstats_heap_alloc_bytes{namespace=\"$namespace\",job=~\"$job\"}[30s])", + "expr": "rate(go_memstats_heap_alloc_bytes{job=\"$job\"})[30s]", "format": "time_series", "intervalFactor": 2, "legendFormat": "alloc rate heap {{instance}}", @@ -2225,18 +2473,18 @@ "step": 10 }, { - "expr": "go_memstats_stack_inuse_bytes{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_memstats_stack_inuse_bytes{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "inuse stack {{instance}}", + "legendFormat": "inuse heap {{instance}}", "legendLink": null, "step": 10 }, { - "expr": "go_memstats_heap_inuse_bytes{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_memstats_heap_inuse_bytes{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "inuse heap {{instance}}", + "legendFormat": "inuse stack {{instance}}", "legendLink": null, "step": 10 } @@ -2309,7 +2557,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_goroutines{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_goroutines{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -2385,7 +2633,7 @@ "steppedLine": false, "targets": [ { - "expr": "go_gc_duration_seconds{namespace=\"$namespace\",job=~\"$job\"}", + "expr": "go_gc_duration_seconds{job=\"$job\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{quantile}} {{instance}}", @@ -2447,8 +2695,8 @@ "list": [ { "current": { - "text": "Prometheus", - "value": "Prometheus" + "text": "default", + "value": "default" }, "hide": 0, "label": null, @@ -2460,27 +2708,22 @@ "type": "datasource" }, { - "allValue": null, - "current": { }, - "datasource": "$datasource", + "auto": true, + "auto_count": 300, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(thanos_status{}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "label": "interval", + "name": "interval", + "query": "5m,10m,30m,1h,6h,12h", + "refresh": 2, + "type": "interval" }, { - "allValue": "thanos-store.*", + "allValue": null, "current": { "text": "all", "value": "$__all" @@ -2492,7 +2735,7 @@ "multi": false, "name": "job", "options": [ ], - "query": "label_values(up{namespace=\"$namespace\", job=~\"thanos-store.*\"}, job)", + "query": "label_values(up{job=~\"thanos-store.*\"}, job)", "refresh": 1, "regex": "", "sort": 2, @@ -2501,21 +2744,6 @@ "tagsQuery": "", "type": "query", "useTags": false - }, - { - "auto": true, - "auto_count": 300, - "auto_min": "10s", - "current": { - "text": "5m", - "value": "5m" - }, - "hide": 0, - "label": "interval", - "name": "interval", - "query": "5m,10m,30m,1h,6h,12h", - "refresh": 2, - "type": "interval" } ] }, diff --git a/go.mod b/go.mod index 7f12c9aba0d..f4a67d28fea 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,7 @@ require ( github.com/cespare/xxhash v1.1.0 github.com/chromedp/cdproto v0.0.0-20200424080200-0de008e41fa0 github.com/chromedp/chromedp v0.5.3 - github.com/cortexproject/cortex v1.7.1-0.20210224085859-66d6fb5b0d42 + github.com/cortexproject/cortex v1.7.1-0.20210316085356-3fedc1108a49 github.com/davecgh/go-spew v1.1.1 github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb github.com/fatih/structtag v1.1.0 @@ -23,7 +23,7 @@ require ( github.com/gogo/protobuf v1.3.2 github.com/gogo/status v1.0.3 github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e - github.com/golang/snappy v0.0.3-0.20201103224600-674baa8c7fc3 + github.com/golang/snappy v0.0.3 github.com/googleapis/gax-go v2.0.2+incompatible github.com/grpc-ecosystem/go-grpc-middleware/providers/kit/v2 v2.0.0-20201002093600-73cf2ae9d891 github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.0-rc.2.0.20201207153454-9f6bf00c00a7 @@ -45,11 +45,11 @@ require ( github.com/opentracing/opentracing-go v1.2.0 github.com/pkg/errors v0.9.1 github.com/pmezard/go-difflib v1.0.0 - github.com/prometheus/alertmanager v0.21.1-0.20201106142418-c39b78780054 + github.com/prometheus/alertmanager v0.21.1-0.20210310093010-0f9cab6991e6 github.com/prometheus/client_golang v1.9.0 github.com/prometheus/client_model v0.2.0 - github.com/prometheus/common v0.15.0 - github.com/prometheus/prometheus v1.8.2-0.20210215121130-6f488061dfb4 + github.com/prometheus/common v0.18.0 + github.com/prometheus/prometheus v1.8.2-0.20210315220929-1cba1741828b github.com/uber/jaeger-client-go v2.25.0+incompatible github.com/uber/jaeger-lib v2.4.0+incompatible github.com/weaveworks/common v0.0.0-20210112142934-23c8d7fa6120 @@ -78,7 +78,7 @@ replace ( // Update to v1.1.1 to make sure windows CI pass. github.com/elastic/go-sysinfo => github.com/elastic/go-sysinfo v1.1.1 // Make sure Prometheus version is pinned as Prometheus semver does not include Go APIs. - github.com/prometheus/prometheus => github.com/prometheus/prometheus v1.8.2-0.20210215121130-6f488061dfb4 + github.com/prometheus/prometheus => github.com/prometheus/prometheus v1.8.2-0.20210315220929-1cba1741828b github.com/sercand/kuberesolver => github.com/sercand/kuberesolver v2.4.0+incompatible google.golang.org/grpc => google.golang.org/grpc v1.29.1 diff --git a/go.sum b/go.sum index f1a2eec948a..490cda85e32 100644 --- a/go.sum +++ b/go.sum @@ -29,6 +29,7 @@ cloud.google.com/go/bigtable v1.1.0/go.mod h1:B6ByKcIdYmhoyDzmOnQxyOhN6r05qnewYI cloud.google.com/go/bigtable v1.2.0/go.mod h1:JcVAOl45lrTmQfLj7T6TxyMzIN/3FGGcFm+2xVAli2o= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= +cloud.google.com/go/firestore v1.1.0/go.mod h1:ulACoGHTpvq5r8rxGJ4ddJZBZqakUQqClKRT5SZwBmk= cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA= @@ -179,6 +180,7 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/bitly/go-hostpool v0.0.0-20171023180738-a3a6125de932/go.mod h1:NOuUCSz6Q9T7+igc/hlvDOUdtWKryOrtFyIVABv/p7k= +github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84= github.com/blang/semver v3.5.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= github.com/bmizerany/pat v0.0.0-20170815010413-6226ea591a40/go.mod h1:8rLXio+WjiTceGBHIoTvn60HIbs7Hm7bcHjyrSqYB9c= @@ -190,6 +192,8 @@ github.com/cenkalti/backoff v1.0.0/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQ github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= github.com/cenkalti/backoff/v4 v4.0.2/go.mod h1:eEew/i+1Q6OrCDZh3WiXYv3+nJwBASZ8Bog/87DQnVg= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/certifi/gocertifi v0.0.0-20191021191039-0944d244cd40 h1:xvUo53O5MRZhVMJAxWCJcS5HHrqAiAG9SJ1LpMu6aAI= +github.com/certifi/gocertifi v0.0.0-20191021191039-0944d244cd40/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA= github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.1.0/go.mod h1:dgIUBU3pDso/gPgZ1osOZ0iQf77oPR28Tjxl5dIMyVM= @@ -210,21 +214,30 @@ github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGX github.com/cockroachdb/apd v1.1.0/go.mod h1:8Sl8LxpKi29FqWXR16WEFZRNSz3SoPzUzeMeY4+DwBQ= github.com/cockroachdb/cockroach-go v0.0.0-20181001143604-e0a95dfd547c/go.mod h1:XGLbWH/ujMcbPbhZq52Nv6UrCghb1yGn//133kEsvDk= github.com/cockroachdb/datadriven v0.0.0-20190531201743-edce55837238/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8= -github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa h1:OaNxuTZr7kxeODyLWsRMC+OD03aFUH+mW6r2d+MWa5Y= github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8= +github.com/cockroachdb/datadriven v0.0.0-20200714090401-bf6692d28da5 h1:xD/lrqdvwsc+O2bjSSi3YqY73Ke3LAiSCx49aCesA0E= +github.com/cockroachdb/datadriven v0.0.0-20200714090401-bf6692d28da5/go.mod h1:h6jFvWxBdQXxjopDMZyH2UVceIRfR84bdzbkoKrsWNo= +github.com/cockroachdb/errors v1.2.4 h1:Lap807SXTH5tri2TivECb/4abUkMZC9zRoLarvcKDqs= +github.com/cockroachdb/errors v1.2.4/go.mod h1:rQD95gz6FARkaKkQXUksEje/d9a6wBJoCr5oaCLELYA= +github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f h1:o/kfcElHqOiXqcou5a3rIlMc7oJbMQkeLk0VQJ7zgqY= +github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f/go.mod h1:i/u985jwjWRlyHXQbwatDASoW0RMlZ/3i9yJHE2xLkI= github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd/go.mod h1:sE/e/2PUdi/liOCUjSTXgM1o87ZssimdTWN964YiIeI= github.com/containerd/containerd v1.2.7/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA= github.com/containerd/containerd v1.4.3 h1:ijQT13JedHSHrQGWFcGEwzcNKrAGIiZ+jSD5QQG07SY= github.com/containerd/containerd v1.4.3/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA= +github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= +github.com/coreos/etcd v3.3.13+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= github.com/coreos/go-semver v0.3.0 h1:wkHLiw0WNATZnSG7epLsujiMCgPAc9xhjJ4tgnAxmfM= github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= github.com/coreos/go-systemd v0.0.0-20180511133405-39ca1b05acc7/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd v0.0.0-20181012123002-c6f51f82210d/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf h1:iW4rZ826su+pqaw19uhpSCzhj44qo35pNgKFGqzDKkU= github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/coreos/go-systemd/v22 v22.1.0 h1:kq/SbG2BCKLkDKkjQf5OWwKWUKj1lgs3lFI4PxnR5lg= +github.com/coreos/go-systemd/v22 v22.1.0/go.mod h1:xO0FLkIi5MaZafQlIrOotqXZ90ih+1atmu1JpKERPPk= github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= -github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f h1:lBNOc5arjvs8E5mO2tbpBpLoyyu8B6e44T7hJy6potg= github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= github.com/cortexproject/cortex v0.6.1-0.20200228110116-92ab6cbe0995/go.mod h1:3Xa3DjJxtpXqxcMGdk850lcIRb81M0fyY1MQ6udY134= github.com/cortexproject/cortex v1.2.1-0.20200805064754-d8edc95e2c91/go.mod h1:PVPxNLrxKH+yc8asaJOxuz7TiRmMizFfnSMOnRzM6oM= @@ -233,9 +246,11 @@ github.com/cortexproject/cortex v1.4.1-0.20201030080541-83ad6df2abea/go.mod h1:k github.com/cortexproject/cortex v1.5.1-0.20201111110551-ba512881b076/go.mod h1:zFBGVsvRBfVp6ARXZ7pmiLaGlbjda5ZnA4Y6qSJyrQg= github.com/cortexproject/cortex v1.6.1-0.20210108144208-6c2dab103f20/go.mod h1:fOsaeeFSyWrjd9nFJO8KVUpsikcxnYsjEzQyjURBoQk= github.com/cortexproject/cortex v1.6.1-0.20210215155036-dfededd9f331/go.mod h1:8bRHNDawVx8te5lIqJ+/AcNTyfosYNC34Qah7+jX/8c= -github.com/cortexproject/cortex v1.7.1-0.20210224085859-66d6fb5b0d42 h1:lcbtnGliuDknToLbaOEK5FixVxgpg+Ty5CDfO0akMO8= github.com/cortexproject/cortex v1.7.1-0.20210224085859-66d6fb5b0d42/go.mod h1:u2dxcHInYbe45wxhLoWVdlFJyDhXewsMcxtnbq/QbH4= +github.com/cortexproject/cortex v1.7.1-0.20210316085356-3fedc1108a49 h1:zyEx414SBEmu5CypoQ+DKohSK9pc0UGMKeh90KHE1jc= +github.com/cortexproject/cortex v1.7.1-0.20210316085356-3fedc1108a49/go.mod h1:/DBOW8TzYBTE/U+O7Whs7i7E2eeeZl1iRVDtIqxn5kg= github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= +github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/creack/pty v1.1.11 h1:07n33Z8lZxZ2qwegKbObQohDhXDQxiMMz1NOUGYlesw= @@ -256,15 +271,17 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/denisenkom/go-mssqldb v0.0.0-20190515213511-eb9f6a1743f3/go.mod h1:zAg7JM8CkOJ43xKXIj7eRO9kmWm/TW578qo+oDO6tuM= -github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM= github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= github.com/dgryski/go-bitstream v0.0.0-20180413035011-3522498ce2c8/go.mod h1:VMaSuZ+SZcx/wljOQKvp5srsbCiKDEb6K2wC4+PiBmQ= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= +github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= github.com/dgryski/go-sip13 v0.0.0-20200911182023-62edffca9245/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= github.com/dhui/dktest v0.3.0/go.mod h1:cyzIUfGsBEbZ6BT7tnXqAShHSXCZhSNmFl70sZ7c1yc= github.com/digitalocean/godo v1.57.0 h1:uCpe0sRIZ/sJWxWDsJyBPBjUfSvxop+WHkHiSf+tjjM= github.com/digitalocean/godo v1.57.0/go.mod h1:p7dOjjtSBqCTUksqtA5Fd3uaKs9kyTq2xcz76ulEJRU= +github.com/dnaeon/go-vcr v1.0.1 h1:r8L/HqC0Hje5AXMu1ooW8oyQyOFv4GxqpL0nRP7SLLY= +github.com/dnaeon/go-vcr v1.0.1/go.mod h1:aBB1+wY4s93YsC3HHjMBMrwTj2R9FHDzUr9KyGc8n1E= github.com/docker/distribution v2.7.0+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= github.com/docker/distribution v2.7.1+incompatible h1:a5mlkVzth6W5A4fOsS3D2EO5BUmsJpcB+cRlLU7cSug= github.com/docker/distribution v2.7.1+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= @@ -325,6 +342,8 @@ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMo github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsouza/fake-gcs-server v1.7.0/go.mod h1:5XIRs4YvwNbNoz+1JF8j6KLAyDh7RHGAyAK3EP2EsNk= +github.com/getsentry/raven-go v0.2.0 h1:no+xWJRb5ZI7eE8TWgIq1jLulQiIoLG0IfYxv5JYMGs= +github.com/getsentry/raven-go v0.2.0/go.mod h1:KungGk8q33+aIAZUIVWZDr2OfAEBsO49PX4NzFV5kcQ= github.com/ghodss/yaml v0.0.0-20150909031657-73d445a93680/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/globalsign/mgo v0.0.0-20180905125535-1ca0a4f7cbcb/go.mod h1:xkRDCp4j0OGD1HRkm4kmhM+pmpv3AKq5SU7GMg4oO/Q= @@ -448,6 +467,8 @@ github.com/go-sql-driver/mysql v1.4.1/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-stack/stack v1.8.0 h1:5SgMzNM5HxrEjV0ww2lTmX6E2Izsfxas4+YHWRs3Lsk= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= +github.com/go-zookeeper/zk v1.0.2 h1:4mx0EYENAdX/B/rbunjlt5+4RTA/a9SMHBRuSKdGxPM= +github.com/go-zookeeper/zk v1.0.2/go.mod h1:nOB03cncLtlp4t+UAkGSV+9beXP/akpekBwL+UX1Qcw= github.com/gobuffalo/attrs v0.0.0-20190224210810-a9411de4debd/go.mod h1:4duuawTqi2wkkpB4ePgWMaai6/Kc6WEz83bhFwpHzj0= github.com/gobuffalo/depgen v0.0.0-20190329151759-d478694a28d3/go.mod h1:3STtPUQYuzV0gBVOY3vy6CfMm/ljR4pABfrTeHNLHUY= github.com/gobuffalo/depgen v0.1.0/go.mod h1:+ifsuy7fhi15RWncXQQKjWS9JPkdah5sZvtHc2RXGlg= @@ -482,6 +503,7 @@ github.com/gocql/gocql v0.0.0-20190301043612-f6df8288f9b4/go.mod h1:4Fw1eo5iaEhD github.com/gocql/gocql v0.0.0-20200121121104-95d072f1b5bb/go.mod h1:DL0ekTmBSTdlNF25Orwt/JMzqIq3EJ4MVa/J/uK64OY= github.com/gocql/gocql v0.0.0-20200526081602-cd04bd7f22a7/go.mod h1:DL0ekTmBSTdlNF25Orwt/JMzqIq3EJ4MVa/J/uK64OY= github.com/godbus/dbus v0.0.0-20190402143921-271e53dc4968/go.mod h1:/YcGZj5zSblfDWMMoOzV4fas9FZnQYTkDnsGvmh2Grw= +github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gofrs/uuid v3.3.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM= github.com/gogo/googleapis v1.1.0 h1:kFkMAZBNAn4j7K0GiZr8cRYzejq68VbheufiV3YuyFI= github.com/gogo/googleapis v1.1.0/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s= @@ -502,6 +524,7 @@ github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGw github.com/golang/geo v0.0.0-20190916061304-5b978397cfec/go.mod h1:QZ0nwyI2jOfgRAoBvP+ab5aRr7c9x7lhGEJrKvBwjWI= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20191027212112-611e8accdfc9/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -535,8 +558,9 @@ github.com/golang/snappy v0.0.0-20170215233205-553a64147049/go.mod h1:/XxbfmMg8l github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/golang/snappy v0.0.3-0.20201103224600-674baa8c7fc3 h1:ur2rms48b3Ep1dxh7aUV2FZEQ8jEVO2F6ILKx8ofkAg= github.com/golang/snappy v0.0.3-0.20201103224600-674baa8c7fc3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA= +github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/gomodule/redigo v1.8.4/go.mod h1:P9dn9mFrCBvWhGE1wpxx6fgq7BAeLBk+UUUzlpkBYO0= github.com/gomodule/redigo v2.0.0+incompatible h1:K/R+8tc58AaqLkqG2Ol3Qk+DR/TlNuhuh457pBFPtt0= github.com/gomodule/redigo v2.0.0+incompatible/go.mod h1:B4C85qUVwatsJoIUNIfCRsp7qO0iAmpGFZ4EELWSbC4= @@ -607,12 +631,15 @@ github.com/gorilla/mux v1.7.1/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2z github.com/gorilla/mux v1.7.3 h1:gnP5JzjVOuiZD07fKKToCAOjS0yOpj/qPETTXCCS6hw= github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= -github.com/gorilla/websocket v1.4.0 h1:WDFjx/TMzVgy9VdMMQi2K2Emtwi2QcUQsztZ/zLaH/Q= github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= +github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= +github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= +github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= -github.com/grpc-ecosystem/go-grpc-middleware v1.1.0 h1:THDBEeQ9xZ8JEaCLyLQqXMMdRqNr0QAUJTIkQAUtFjg= github.com/grpc-ecosystem/go-grpc-middleware v1.1.0/go.mod h1:f5nM7jw/oeRSadq3xCzHAvxcr8HZnzsqU6ILg/0NiiE= +github.com/grpc-ecosystem/go-grpc-middleware v1.2.2 h1:FlFbCRLd5Jr4iYXZufAvgWN6Ao0JrI5chLINnUXDDr0= +github.com/grpc-ecosystem/go-grpc-middleware v1.2.2/go.mod h1:EaizFBKfUKtMIF5iaDEhniwNedqGo9FuLFzppDr3uwI= github.com/grpc-ecosystem/go-grpc-middleware/providers/kit/v2 v2.0.0-20201002093600-73cf2ae9d891 h1:RhOqTAECcPnehv3hKlYy1fAnpQ7rnZu58l3mpq6gT1k= github.com/grpc-ecosystem/go-grpc-middleware/providers/kit/v2 v2.0.0-20201002093600-73cf2ae9d891/go.mod h1:516cTXxZzi4NBUBbKcwmO4Eqbb6GHAEd3o4N+GYyCBY= github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.0-20200501113911-9a95f0fdbfea/go.mod h1:GugMBs30ZSAkckqXEAIEGyYdDH6EgqowG8ppA3Zt+AY= @@ -621,16 +648,21 @@ github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.0-rc.2.0.20201207153454-9f6 github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= github.com/grpc-ecosystem/grpc-gateway v1.4.1/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpgL2+G+NZTnrVHpWWfpdw= +github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.12.1/go.mod h1:8XEsbTttt/W+VvjtQhLACqCisSPWTxCZ7sBRjU6iH9c= +github.com/grpc-ecosystem/grpc-gateway v1.14.6/go.mod h1:zdiPV4Yse/1gnckTHtghG4GkDEdKCRJduHpTxT3/jcw= github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= +github.com/grpc-ecosystem/grpc-opentracing v0.0.0-20180507213350-8e809c8a8645/go.mod h1:6iZfnjpejD4L/4DwD7NryNaJyCQdzwWwH2MWhCA90Kw= github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed/go.mod h1:tMWxXQ9wFIaZeTI9F+hmhFiGpFmhOHzyShyFUhRm0H4= +github.com/hashicorp/consul/api v1.1.0/go.mod h1:VmuI/Lkw1nC05EYQWNKwWGbkg+FbDBtguAZLlVdkD9Q= github.com/hashicorp/consul/api v1.3.0/go.mod h1:MmDNSzIMUjNpY/mQ398R4bk2FnqQLoPndWW5VkKPlCE= github.com/hashicorp/consul/api v1.5.0/go.mod h1:LqwrLNW876eYSuUOo4ZLHBcdKc038txr/IMfbLPATa4= github.com/hashicorp/consul/api v1.7.0/go.mod h1:1NSuaUUkFaJzMasbfq/11wKYWSR67Xn6r2DXKhuDNFg= github.com/hashicorp/consul/api v1.8.1 h1:BOEQaMWoGMhmQ29fC26bi0qb7/rId9JzZP2V0Xmx7m8= github.com/hashicorp/consul/api v1.8.1/go.mod h1:sDjTOq0yUyv5G4h+BqSea7Fn6BU+XbolEz1952UB+mk= +github.com/hashicorp/consul/sdk v0.1.1/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= github.com/hashicorp/consul/sdk v0.3.0/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= github.com/hashicorp/consul/sdk v0.5.0/go.mod h1:fY08Y9z5SvJqevyZNy6WWPXiG3KwBPAvlcdx16zZ0fM= github.com/hashicorp/consul/sdk v0.6.0/go.mod h1:fY08Y9z5SvJqevyZNy6WWPXiG3KwBPAvlcdx16zZ0fM= @@ -671,6 +703,7 @@ github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ github.com/hashicorp/golang-lru v0.5.3/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= github.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+lJfyTc= github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= +github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ= github.com/hashicorp/mdns v1.0.1/go.mod h1:4gW7WsVCke5TE7EPeYliwHlRUyBtfCwuFwuMg2DmyNY= @@ -718,8 +751,9 @@ github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfC github.com/joeshaw/multierror v0.0.0-20140124173710-69b34d4ec901 h1:rp+c0RAYOWj8l6qbCUTSiRLG/iKnW3K3/QfPPuSsBt4= github.com/joeshaw/multierror v0.0.0-20140124173710-69b34d4ec901/go.mod h1:Z86h9688Y0wesXCyonoVr47MasHilkuLMqGhRZ4Hpak= github.com/joho/godotenv v1.3.0/go.mod h1:7hK45KPybAkOC6peb+G5yklZfMxEjkZhHbwpqxOKXbg= -github.com/jonboulle/clockwork v0.1.0 h1:VKV+ZcuP6l3yW9doeqz6ziZGgcynBVQO+obU0+0hcPo= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= +github.com/jonboulle/clockwork v0.2.2 h1:UOGuzwb1PwsrDAObMuhUnj0p5ULPj8V/xJ7Kx9qUBdQ= +github.com/jonboulle/clockwork v0.2.2/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= @@ -765,11 +799,11 @@ github.com/knq/sysutil v0.0.0-20191005231841-15668db23d08 h1:V0an7KRw92wmJysvFvt github.com/knq/sysutil v0.0.0-20191005231841-15668db23d08/go.mod h1:dFWs1zEqDjFtnBXsd1vPOZaLsESovai349994nHx3e0= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8= github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pty v1.0.0/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= @@ -796,6 +830,7 @@ github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0U github.com/lovoo/gcloud-opentracing v0.3.0 h1:nAeKG70rIsog0TelcEtt6KU0Y1s5qXtsDLnHp0urPLU= github.com/lovoo/gcloud-opentracing v0.3.0/go.mod h1:ZFqk2y38kMDDikZPAK7ynTTGuyt17nSPdS3K5e+ZTBY= github.com/lufia/iostat v1.1.0/go.mod h1:rEPNA0xXgjHQjuI5Cy05sLlS2oRcSlWHRLrvh/AQ+Pg= +github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/mailru/easyjson v0.0.0-20160728113105-d5b7844b561a/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/mailru/easyjson v0.0.0-20180823135443-60711f1a8329/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/mailru/easyjson v0.0.0-20190312143242-1de009706dbe/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= @@ -947,8 +982,9 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.0.1 h1:JMemWkRwHx4Zj+fVxWoMCFm/8sYGGrUVojFA6h/TRcI= github.com/opencontainers/image-spec v1.0.1/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0= -github.com/opentracing-contrib/go-grpc v0.0.0-20180928155321-4b5a12d3ff02 h1:0R5mDLI66Qw13qN80TRz85zthQ2nf2+uDyiV23w6c3Q= github.com/opentracing-contrib/go-grpc v0.0.0-20180928155321-4b5a12d3ff02/go.mod h1:JNdpVEzCpXBgIiv4ds+TzhN1hrtxq6ClLrTlT9OQRSc= +github.com/opentracing-contrib/go-grpc v0.0.0-20210225150812-73cb765af46e h1:4cPxUYdgaGzZIT5/j0IfqOrrXmq6bG8AwvwisMXpdrg= +github.com/opentracing-contrib/go-grpc v0.0.0-20210225150812-73cb765af46e/go.mod h1:DYR5Eij8rJl8h7gblRrOZ8g0kW1umSpKqYIBTgeDtLo= github.com/opentracing-contrib/go-observer v0.0.0-20170622124052-a52f23424492/go.mod h1:Ngi6UdF0k5OKD5t5wlmGhe/EDKPoUM3BXZSSfIuJbis= github.com/opentracing-contrib/go-stdlib v0.0.0-20190519235532-cf7a6c988dc9/go.mod h1:PLldrQSroqzH70Xl+1DQcGnefIbqsKR7UDaiux3zV+w= github.com/opentracing-contrib/go-stdlib v1.0.0 h1:TBS7YuVotp8myLon4Pv7BtCBzOTo1DeZCld0Z63mW2w= @@ -971,6 +1007,7 @@ github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0Mw github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/paulbellamy/ratecounter v0.2.0/go.mod h1:Hfx1hDpSGoqxkVVpBi/IlYD7kChlfo5C6hzIHwPqfFE= github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k= +github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/pelletier/go-toml v1.4.0/go.mod h1:PN7xzY2wHTK0K9p34ErDQMlFxa51Fk0OUruD3k1mMwo= github.com/pelletier/go-toml v1.7.0/go.mod h1:vwGMzjaWMwyfHwgIBhI2YUM4fB6nL6lVAvS1LBMMhTE= github.com/performancecopilot/speed v3.0.0+incompatible/go.mod h1:/CLtqpZ5gBg1M9iaPbIdPPGyKcA8hKdoy6hAWba7Yac= @@ -993,18 +1030,21 @@ github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSg github.com/prometheus/alertmanager v0.19.0/go.mod h1:Eyp94Yi/T+kdeb2qvq66E3RGuph5T/jm/RBVh4yz1xo= github.com/prometheus/alertmanager v0.21.0/go.mod h1:h7tJ81NA0VLWvWEayi1QltevFkLF3KxmC/malTcT8Go= github.com/prometheus/alertmanager v0.21.1-0.20200911160112-1fdff6b3f939/go.mod h1:imXRHOP6QTsE0fFsIsAV/cXimS32m7gVZOiUj11m6Ig= -github.com/prometheus/alertmanager v0.21.1-0.20201106142418-c39b78780054 h1:NgCRBfzDpyIhX6Pjh7XSWPHUC8T5dA1yVuK/gwXM7Jw= github.com/prometheus/alertmanager v0.21.1-0.20201106142418-c39b78780054/go.mod h1:imXRHOP6QTsE0fFsIsAV/cXimS32m7gVZOiUj11m6Ig= +github.com/prometheus/alertmanager v0.21.1-0.20210310093010-0f9cab6991e6 h1:WeazuhFA+g8Xce5wgqskDP+b48oQKk7smH72dxO2beA= +github.com/prometheus/alertmanager v0.21.1-0.20210310093010-0f9cab6991e6/go.mod h1:MTqVn+vIupE0dzdgo+sMcNCp37SCAi8vPrvKTTnTz9g= github.com/prometheus/client_golang v0.8.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v0.9.2/go.mod h1:OsXs2jCmiKlQ1lTBmv21f2mNfw4xf/QclQDMrYNZzcM= github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829/go.mod h1:p2iRAGwDERtqlqzRXnrOVns+ignqQo//hLXqYxZYVNs= +github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= github.com/prometheus/client_golang v1.1.0/go.mod h1:I1FGZT9+L76gKKOs5djB6ezCbFQP1xR9D75/vuwEF3g= github.com/prometheus/client_golang v1.2.1/go.mod h1:XMU6Z2MjaRKVu/dC1qupJI9SiNkDYzz3xecMgSW/F+U= github.com/prometheus/client_golang v1.3.0/go.mod h1:hJaj2vgQTGQmVCsAACORcieXFeDPbaTKGT+JTgUa3og= github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= github.com/prometheus/client_golang v1.4.1/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= +github.com/prometheus/client_golang v1.5.1/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= github.com/prometheus/client_golang v1.6.0/go.mod h1:ZLOG9ck3JLRdB5MgO8f+lLTe83AXG6ro35rLTxvnIl4= github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= github.com/prometheus/client_golang v1.8.0/go.mod h1:O9VU6huf47PktckDQfMTX0Y8tY0/7TSWwj+ITvv0TnM= @@ -1019,8 +1059,10 @@ github.com/prometheus/client_model v0.1.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6T github.com/prometheus/client_model v0.2.0 h1:uq5h0d+GuxiXLJLNABMgp2qUWDPiLvgCzz2dUR+/W/M= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/common v0.0.0-20180518154759-7600349dcfe1/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= +github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= github.com/prometheus/common v0.0.0-20181126121408-4724e9255275/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= github.com/prometheus/common v0.2.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= +github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.6.0/go.mod h1:eBmuwkDJBwy6iBfxCBob6t6dR6ENT/y+J+Zk0j9GMYc= github.com/prometheus/common v0.7.0/go.mod h1:DjGbpBbp5NYNiECxcL/VnbXCCaQpKd3tt26CguLLsqA= @@ -1030,8 +1072,10 @@ github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB8 github.com/prometheus/common v0.11.1/go.mod h1:U+gB1OBLb1lF3O42bTCL+FK18tX9Oar16Clt/msog/s= github.com/prometheus/common v0.12.0/go.mod h1:U+gB1OBLb1lF3O42bTCL+FK18tX9Oar16Clt/msog/s= github.com/prometheus/common v0.14.0/go.mod h1:U+gB1OBLb1lF3O42bTCL+FK18tX9Oar16Clt/msog/s= -github.com/prometheus/common v0.15.0 h1:4fgOnadei3EZvgRwxJ7RMpG1k1pOZth5Pc13tyspaKM= github.com/prometheus/common v0.15.0/go.mod h1:U+gB1OBLb1lF3O42bTCL+FK18tX9Oar16Clt/msog/s= +github.com/prometheus/common v0.18.0 h1:WCVKW7aL6LEe1uryfI9dnEc2ZqNB1Fn0ok930v0iL1Y= +github.com/prometheus/common v0.18.0/go.mod h1:U+gB1OBLb1lF3O42bTCL+FK18tX9Oar16Clt/msog/s= +github.com/prometheus/exporter-toolkit v0.5.0/go.mod h1:OCkM4805mmisBhLmVFw858QYi3v0wKdY6/UxrT0pZVg= github.com/prometheus/exporter-toolkit v0.5.1/go.mod h1:OCkM4805mmisBhLmVFw858QYi3v0wKdY6/UxrT0pZVg= github.com/prometheus/node_exporter v1.0.0-rc.0.0.20200428091818-01054558c289 h1:dTUS1vaLWq+Y6XKOTnrFpoVsQKLCbCp1OLj24TDi7oM= github.com/prometheus/node_exporter v1.0.0-rc.0.0.20200428091818-01054558c289/go.mod h1:FGbBv5OPKjch+jNUJmEQpMZytIdyW0NdBtWFcfSKusc= @@ -1040,6 +1084,7 @@ github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R github.com/prometheus/procfs v0.0.0-20181204211112-1dc9a6cbc91a/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.0-20190117184657-bf6a532e95b1/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.0-20190425082905-87a4384529e0/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= +github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= github.com/prometheus/procfs v0.0.3/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDaekg4FpcdQ= github.com/prometheus/procfs v0.0.5/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDaekg4FpcdQ= @@ -1049,8 +1094,9 @@ github.com/prometheus/procfs v0.0.11/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4 github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= github.com/prometheus/procfs v0.2.0 h1:wH4vA7pcjKuZzjF7lM8awk4fnuJO6idemZXoKnULUx4= github.com/prometheus/procfs v0.2.0/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= -github.com/prometheus/prometheus v1.8.2-0.20210215121130-6f488061dfb4 h1:EbUBvqL6oYUwL6IAI4OzxM9GYbRE+/N+maV/w5+v6Ac= -github.com/prometheus/prometheus v1.8.2-0.20210215121130-6f488061dfb4/go.mod h1:NAYujktP0dmSSpeV155mtnwX2pndLpVVK/Ps68R01TA= +github.com/prometheus/prometheus v1.8.2-0.20210315220929-1cba1741828b h1:TTOvmIV3W6IUIj3pYFs9gfCgueHlriLStMGBsnNdEX4= +github.com/prometheus/prometheus v1.8.2-0.20210315220929-1cba1741828b/go.mod h1:MS/bpdil77lPbfQeKk6OqVQ9OLnpN3Rszd0hka0EOWE= +github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= github.com/rafaeljusto/redigomock v0.0.0-20190202135759-257e089e14a1/go.mod h1:JaY6n2sDr+z2WTsXkOmNRUfDy6FN0L6Nk7x06ndm4tY= github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/retailnext/hllpp v1.0.1-0.20180308014038-101a6d2f8b52/go.mod h1:RDpi1RftBQPUCDRw6SmxeaREsAaRKnOclghuzp/WRzc= @@ -1067,13 +1113,13 @@ github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQD github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= github.com/ryanuber/columnize v2.1.0+incompatible/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E= -github.com/samuel/go-zookeeper v0.0.0-20201211165307-7117e9ea2414 h1:AJNDS0kP60X8wwWFvbLPwDuojxubj9pbfK7pjHw0vKg= -github.com/samuel/go-zookeeper v0.0.0-20201211165307-7117e9ea2414/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E= github.com/santhosh-tekuri/jsonschema v1.2.4 h1:hNhW8e7t+H1vgY+1QeEQpveR6D4+OwKPXCfD2aieJis= github.com/santhosh-tekuri/jsonschema v1.2.4/go.mod h1:TEAUOeZSmIxTTuHatJzrvARHiuO9LYd+cIxzgEHCQI4= github.com/satori/go.uuid v0.0.0-20160603004225-b111a074d5ef/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww= github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= +github.com/scaleway/scaleway-sdk-go v1.0.0-beta.7.0.20210223165440-c65ae3540d44 h1:3egqo0Vut6daANFm7tOXdNAa8v5/uLU+sgCJrc88Meo= +github.com/scaleway/scaleway-sdk-go v1.0.0-beta.7.0.20210223165440-c65ae3540d44/go.mod h1:CJJ5VAbozOl0yEw7nHB9+7BXTJbIn6h7W+f6Gau5IP8= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= github.com/segmentio/fasthash v0.0.0-20180216231524-a72b379d632e/go.mod h1:tm/wZFQ8e24NYaBGIlnO2WGCAi67re4HHuOm0sftE/M= @@ -1098,30 +1144,36 @@ github.com/sirupsen/logrus v1.4.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPx github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMBDgk/93Q= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.5.0/go.mod h1:+F7Ogzej0PZc/94MaYx/nvG9jOFMD2osvC3s+Squfpo= -github.com/sirupsen/logrus v1.6.0 h1:UBcNElsrwanuuMsnGSlYmtmgbb23qDR5dG+6X6Oo89I= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= +github.com/sirupsen/logrus v1.7.0 h1:ShrD1U9pZB12TX0cVy0DtePoCH97K8EtX+mg7ZARUtM= +github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/assertions v1.0.1 h1:voD4ITNjPL5jjBfgR/r8fPIIBrliWrWHeiJApdr3r4w= github.com/smartystreets/assertions v1.0.1/go.mod h1:kHHU4qYBaI3q23Pp3VPrmWhuIUrLW/7eUrw0BU5VaoM= github.com/smartystreets/goconvey v0.0.0-20190330032615-68dc04aab96a/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIKYqbNC9s= github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= -github.com/soheilhy/cmux v0.1.4 h1:0HKaf1o97UwFjHH9o5XsHUOF+tqmdA7KEzXLpiyaw0E= github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= +github.com/soheilhy/cmux v0.1.5-0.20210205191134-5ec6847320e5 h1:GJTW+uNMIV1RKwox+T4aN0/sQlYRg78uHZf2H0aBcDw= +github.com/soheilhy/cmux v0.1.5-0.20210205191134-5ec6847320e5/go.mod h1:T7TcVDs9LWfQgPlPsdngu6I6QIoyIFZDDC6sNE1GqG0= github.com/sony/gobreaker v0.4.1 h1:oMnRNZXX5j85zso6xCPRNPtmAycat+WcoKbklScLDgQ= github.com/sony/gobreaker v0.4.1/go.mod h1:ZKptC7FHNvhBz7dN2LGjPVBz2sZJmc0/PkyDJOjmxWY= github.com/soundcloud/go-runit v0.0.0-20150630195641-06ad41a06c4a/go.mod h1:LeFCbQYJ3KJlPs/FvPz2dy1tkpxyeNESVyCNNzRXFR0= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= +github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= +github.com/spf13/cobra v1.1.1/go.mod h1:WnodtKOvamDL/PwE2M4iKs8aMDBZ5Q5klgD3qfVJQMI= +github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= github.com/spf13/pflag v0.0.0-20170130214245-9ff6c6923cff/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/viper v1.7.0/go.mod h1:8WkrPz2fc9jxqZNCJI/76HCieCp4Q8HaLFoCha5qpdg= github.com/streadway/amqp v0.0.0-20190404075320-75d898a42a94/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= github.com/streadway/amqp v0.0.0-20190827072141-edfb9018d271/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= github.com/streadway/handy v0.0.0-20190108123426-d5acb3125c2a/go.mod h1:qNTQ5P5JnDBl6z3cMAg/SywNDC5ABu5ApDIw6lUbRmI= @@ -1138,6 +1190,7 @@ github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5 github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= github.com/thanos-io/thanos v0.8.1-0.20200109203923-552ffa4c1a0d/go.mod h1:usT/TxtJQ7DzinTt+G9kinDQmRS5sxwu0unVKZ9vdcw= github.com/thanos-io/thanos v0.13.1-0.20200731083140-69b87607decf/go.mod h1:G8caR6G7pSDreRDvFm9wFuyjEBztmr8Ag3kBYpa/fEc= github.com/thanos-io/thanos v0.13.1-0.20200807203500-9b578afb4763/go.mod h1:KyW0a93tsh7v4hXAwo2CVAIRYuZT1Kkf4e04gisQjAg= @@ -1146,6 +1199,7 @@ github.com/thanos-io/thanos v0.13.1-0.20201030101306-47f9a225cc52/go.mod h1:OqqX github.com/thanos-io/thanos v0.13.1-0.20210108102609-f85e4003ba51/go.mod h1:kPvI4H0AynFiHDN95ZB28/k70ZPGCx+pBrRh6RZPimw= github.com/thanos-io/thanos v0.13.1-0.20210204123931-82545cdd16fe/go.mod h1:ZLDGYRNkgM+FCwYNOD+6tOV+DE2fpjzfV6iqXyOgFIw= github.com/thanos-io/thanos v0.13.1-0.20210224074000-659446cab117/go.mod h1:kdqFpzdkveIKpNNECVJd75RPvgsAifQgJymwCdfev1w= +github.com/thanos-io/thanos v0.13.1-0.20210226164558-03dace0a1aa1/go.mod h1:gMCy4oCteKTT7VuXVvXLTPGzzjovX1VPE5p+HgL1hyU= github.com/themihai/gomemcache v0.0.0-20180902122335-24332e2d58ab h1:7ZR3hmisBWw77ZpO1/o86g+JV3VKlk3d48jopJxzTjU= github.com/themihai/gomemcache v0.0.0-20180902122335-24332e2d58ab/go.mod h1:eheTFp954zcWZXCU8d0AT76ftsQOTo4DTqkN/h3k1MY= github.com/tidwall/pretty v0.0.0-20180105212114-65a9db5fad51/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= @@ -1153,8 +1207,9 @@ github.com/tidwall/pretty v1.0.0 h1:HsD+QiTn7sK6flMKIvNmpqz1qrpP3Ps6jOKIKMooyg4= github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= github.com/tinylib/msgp v1.0.2/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= -github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5 h1:LnC5Kc/wtumK+WB441p7ynQJzVuNRJiqddSIE3IlSEQ= github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= +github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966 h1:j6JEOq5QWFker+d7mFQYOhjTZonQ7YkLTHm56dbn+yM= +github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= github.com/uber/jaeger-client-go v2.15.0+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk= github.com/uber/jaeger-client-go v2.20.1+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk= @@ -1201,13 +1256,28 @@ go.elastic.co/apm/module/apmot v1.5.0 h1:rPyHRI6Ooqjwny67au6e2eIxLZshqd7bJfAUpdg go.elastic.co/apm/module/apmot v1.5.0/go.mod h1:d2KYwhJParTpyw2WnTNy8geNlHKKFX+4oK3YLlsesWE= go.elastic.co/fastjson v1.0.0 h1:ooXV/ABvf+tBul26jcVViPT3sBir0PvXgibYB1IQQzg= go.elastic.co/fastjson v1.0.0/go.mod h1:PmeUOMMtLHQr9ZS9J9owrAVg0FkaZDRZJEFTTGHtchs= +go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= -go.etcd.io/bbolt v1.3.5-0.20200615073812-232d8fc87f50 h1:ASw9n1EHMftwnP3Az4XW6e308+gNsrHzmdhd0Olz9Hs= go.etcd.io/bbolt v1.3.5-0.20200615073812-232d8fc87f50/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ= +go.etcd.io/bbolt v1.3.5 h1:XAzx9gjCb0Rxj7EoqcClPD1d5ZBxZJk0jbuoPHenBt0= +go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ= go.etcd.io/etcd v0.0.0-20190709142735-eb7dd97135a5/go.mod h1:N0RPWo9FXJYZQI4BTkDtQylrstIigYHeR18ONnyTufk= go.etcd.io/etcd v0.0.0-20191023171146-3cf2f69b5738/go.mod h1:dnLIgRNXwCJa5e+c6mIZCrds/GIG4ncV9HhK5PX7jPg= go.etcd.io/etcd v0.5.0-alpha.5.0.20200520232829-54ba9589114f h1:pBCD+Z7cy5WPTq+R6MmJJvDRpn88cp7bmTypBsn91g4= go.etcd.io/etcd v0.5.0-alpha.5.0.20200520232829-54ba9589114f/go.mod h1:skWido08r9w6Lq/w70DO5XYIKMu4QFu1+4VsqLQuJy8= +go.etcd.io/etcd/api/v3 v3.5.0-alpha.0 h1:+e5nrluATIy3GP53znpkHMFzPTHGYyzvJGFCbuI6ZLc= +go.etcd.io/etcd/api/v3 v3.5.0-alpha.0/go.mod h1:mPcW6aZJukV6Aa81LSKpBjQXTWlXB5r74ymPoSWa3Sw= +go.etcd.io/etcd/client/v2 v2.305.0-alpha.0 h1:jZepGpOeJATxsbMNBZczDS2jHdK/QVHM1iPe9jURJ8o= +go.etcd.io/etcd/client/v2 v2.305.0-alpha.0/go.mod h1:kdV+xzCJ3luEBSIeQyB/OEKkWKd8Zkux4sbDeANrosU= +go.etcd.io/etcd/client/v3 v3.5.0-alpha.0/go.mod h1:wKt7jgDgf/OfKiYmCq5WFGxOFAkVMLxiiXgLDFhECr8= +go.etcd.io/etcd/client/v3 v3.5.0-alpha.0.0.20210225194612-fa82d11a958a h1:GZLxiPIaZ/U1Mez9rw3BqUHKt3y3+CK4HWtGAG0Pfx0= +go.etcd.io/etcd/client/v3 v3.5.0-alpha.0.0.20210225194612-fa82d11a958a/go.mod h1:wKt7jgDgf/OfKiYmCq5WFGxOFAkVMLxiiXgLDFhECr8= +go.etcd.io/etcd/pkg/v3 v3.5.0-alpha.0 h1:3yLUEC0nFCxw/RArImOyRUI4OAFbg4PFpBbAhSNzKNY= +go.etcd.io/etcd/pkg/v3 v3.5.0-alpha.0/go.mod h1:tV31atvwzcybuqejDoY3oaNRTtlD2l/Ot78Pc9w7DMY= +go.etcd.io/etcd/raft/v3 v3.5.0-alpha.0 h1:DvYJotxV9q1Lkn7pknzAbFO/CLtCVidCr2K9qRLJ8pA= +go.etcd.io/etcd/raft/v3 v3.5.0-alpha.0/go.mod h1:FAwse6Zlm5v4tEWZaTjmNhe17Int4Oxbu7+2r0DiD3w= +go.etcd.io/etcd/server/v3 v3.5.0-alpha.0.0.20210225194612-fa82d11a958a h1:s40lP1ShwqY7NhqXcgAC5iq1B3EOQTpmbvOFl+o6ZxE= +go.etcd.io/etcd/server/v3 v3.5.0-alpha.0.0.20210225194612-fa82d11a958a/go.mod h1:tsKetYpt980ZTpzl/gb+UOJj9RkIyCb1u4wjzMg90BQ= go.mongodb.org/mongo-driver v1.0.3/go.mod h1:u7ryQJ+DOzQmeO7zB6MHyr8jkEQvC8vH7qLUO4lqsUM= go.mongodb.org/mongo-driver v1.1.0/go.mod h1:u7ryQJ+DOzQmeO7zB6MHyr8jkEQvC8vH7qLUO4lqsUM= go.mongodb.org/mongo-driver v1.1.1/go.mod h1:u7ryQJ+DOzQmeO7zB6MHyr8jkEQvC8vH7qLUO4lqsUM= @@ -1250,8 +1320,9 @@ go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9E go.uber.org/zap v1.9.1/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.13.0/go.mod h1:zwrFLgMcdUuIBviXEYEH1YKNaOBnKXsx2IPda5bBwHM= -go.uber.org/zap v1.14.1 h1:nYDKopTbvAPq/NrUVZwT15y2lpROBiLLyoRTbXOYWOo= go.uber.org/zap v1.14.1/go.mod h1:Mb2vm2krFEG5DV0W9qcHBYFtp/Wku1cvYaqPsS/WYfc= +go.uber.org/zap v1.16.0 h1:uFRZXykJGK9lLY4HtgSw44DnIcAM+kRBP7x5m+NpAOM= +go.uber.org/zap v1.16.0/go.mod h1:MA8QOfq0BHJwdXa996Y4dYkAqRKB8/1K1QMMZVaNZjQ= golang.org/x/crypto v0.0.0-20180608092829-8ac0e0d97ce4/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= @@ -1291,6 +1362,7 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= +golang.org/x/exp v0.0.0-20200331195152-e8c3332aa8e5/go.mod h1:4M0jN8W1tt0AVLNr8HDosyJCDCDuyL9N9+3m7wDWgKw= golang.org/x/exp v0.0.0-20200821190819-94841d0725da/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= @@ -1342,6 +1414,7 @@ golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190921015927-1a5e07d1ff72/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20191002035440-2ec189313ef0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20191004110552-13f9640d40b9/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= @@ -1355,6 +1428,7 @@ golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= @@ -1472,6 +1546,7 @@ golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200831180312-196b9ba8737a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201009025420-dfb3f7c4e634/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201015000850-e3ed0017c211/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201112073958-5cba982894dd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -1535,6 +1610,7 @@ golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191108193012-7d206e10da11/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191111182352-50fa39b762bc/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191112195655-aa38f8e97acc/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= @@ -1568,6 +1644,7 @@ golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200904185747-39188db58858/go.mod h1:Cj7w3i3Rnn0Xh82ur9kSqwfTHTeVxaDqrfMjpcNT6bE= +golang.org/x/tools v0.0.0-20201014170642-d1624618ad65/go.mod h1:z6u4i615ZeAfBE4XtMziQW1fSVJXACjjbWkB/mvPzlU= golang.org/x/tools v0.0.0-20201110124207-079ba7bd75cd/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20201201161351-ac6f37ff4c2a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20201208233053-a543418bbed2/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= @@ -1646,6 +1723,7 @@ google.golang.org/genproto v0.0.0-20200228133532-8c2c7df3a383/go.mod h1:55QSHmfG google.golang.org/genproto v0.0.0-20200305110556-506484158171/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= diff --git a/mixin/README.md b/mixin/README.md index 0720caf2415..8bc4e30d0b6 100644 --- a/mixin/README.md +++ b/mixin/README.md @@ -59,48 +59,71 @@ This project is intended to be used as a library. You can extend and customize d [embedmd]:# (config.libsonnet) ```libsonnet { + local thanos = self, + // TargetGroups is a way to help mixin users to add high level target grouping to their alerts and dashboards. + // With the help of TargetGroups you can use a single observability stack to monitor several Thanos instances. + // The key in the key-value pair will be used as "label name" in the alerts and variable name in the dashboards. + // The value in the key-value pair will be used as a query to fetch available values for the given label name. + targetGroups+:: { + // For example for given following groups, + // namespace: 'thanos_status', + // cluster: 'find_mi_cluster_bitte', + // zone: 'an_i_in_da_zone', + // region: 'losing_my_region', + // will generate queriers for the alerts as follows: + // ( + // sum by (cluster, namespace, region, zone, job) (rate(thanos_compact_group_compactions_failures_total{job=~"thanos-compact.*"}[5m])) + // / + // sum by (cluster, namespace, region, zone, job) (rate(thanos_compact_group_compactions_total{job=~"thanos-compact.*"}[5m])) + // * 100 > 5 + // ) + // + // AND for the dashborads: + // + // sum by (cluster, namespace, region, zone, job) (rate(thanos_compact_group_compactions_failures_total{cluster=\"$cluster\", namespace=\"$namespace\", region=\"$region\", zone=\"$zone\", job=\"$job\"}[$interval])) + // / + // sum by (cluster, namespace, region, zone, job) (rate(thanos_compact_group_compactions_total{cluster=\"$cluster\", namespace=\"$namespace\", region=\"$region\", zone=\"$zone\", job=\"$job\"}[$interval])) + }, query+:: { - jobPrefix: 'thanos-query', - selector: 'job=~"%s.*"' % self.jobPrefix, + selector: 'job=~"thanos-query.*"', title: '%(prefix)sQuery' % $.dashboard.prefix, }, store+:: { - jobPrefix: 'thanos-store', - selector: 'job=~"%s.*"' % self.jobPrefix, + selector: 'job=~"thanos-store.*"', title: '%(prefix)sStore' % $.dashboard.prefix, }, receive+:: { - jobPrefix: 'thanos-receive', - selector: 'job=~"%s.*"' % self.jobPrefix, + selector: 'job=~"thanos-receive.*"', title: '%(prefix)sReceive' % $.dashboard.prefix, }, rule+:: { - jobPrefix: 'thanos-rule', - selector: 'job=~"%s.*"' % self.jobPrefix, + selector: 'job=~"thanos-rule.*"', title: '%(prefix)sRule' % $.dashboard.prefix, }, compact+:: { - jobPrefix: 'thanos-compact', - selector: 'job=~"%s.*"' % self.jobPrefix, + selector: 'job=~"thanos-compact.*"', title: '%(prefix)sCompact' % $.dashboard.prefix, }, sidecar+:: { - jobPrefix: 'thanos-sidecar', - selector: 'job=~"%s.*"' % self.jobPrefix, + selector: 'job=~"thanos-sidecar.*"', title: '%(prefix)sSidecar' % $.dashboard.prefix, }, + // TODO(kakkoyun): Fix naming convention: bucketReplicate bucket_replicate+:: { - jobPrefix: 'thanos-bucket-replicate', - selector: 'job=~"%s.*"' % self.jobPrefix, + selector: 'job=~"thanos-bucket-replicate.*"', title: '%(prefix)sBucketReplicate' % $.dashboard.prefix, }, - overview+:: { - title: '%(prefix)sOverview' % $.dashboard.prefix, - }, dashboard+:: { prefix: 'Thanos / ', tags: ['thanos-mixin'], - namespaceQuery: 'thanos_status', + selector: ['%s="$%s"' % [level, level] for level in std.objectFields(thanos.targetGroups)], + dimensions: ['%s' % level for level in std.objectFields(thanos.targetGroups)], + + overview+:: { + title: '%(prefix)sOverview' % $.dashboard.prefix, + selector: std.join(', ', thanos.dashboard.selector), + dimensions: std.join(', ', thanos.dashboard.dimensions + ['job']), + }, }, } ``` diff --git a/mixin/alerts/absent.libsonnet b/mixin/alerts/absent.libsonnet index 1786a53ce44..9a37b69ed09 100644 --- a/mixin/alerts/absent.libsonnet +++ b/mixin/alerts/absent.libsonnet @@ -1,5 +1,7 @@ local capitalize(str) = std.asciiUpper(std.substr(str, 0, 1)) + std.asciiLower(std.substr(str, 1, std.length(str))); local titlize(str) = std.join('', std.map(capitalize, std.split(str, '_'))); + +local components = ['query', 'receive', 'rule', 'compact', 'store', 'bucket_replicate', 'sidecar']; { local thanos = self, @@ -7,10 +9,11 @@ local titlize(str) = std.join('', std.map(capitalize, std.split(str, '_'))); jobs:: { ['Thanos%s' % titlize(component)]: thanos[component].selector for component in std.objectFieldsAll(thanos) - if component != 'jobs' && std.type(thanos[component]) == 'object' && std.objectHas(thanos[component], 'selector') + if component != 'jobs' && std.type(thanos[component]) == 'object' && std.member(components, component) }, prometheusAlerts+:: { + local location = if std.length(std.objectFields(thanos.targetGroups)) > 0 then ' from ' + std.join('/', ['{{$labels.%s}}' % level for level in std.objectFields(thanos.targetGroups)]) else '', groups+: [ { name: 'thanos-component-absent', @@ -25,8 +28,8 @@ local titlize(str) = std.join('', std.map(capitalize, std.split(str, '_'))); severity: 'critical', }, annotations: { - description: '%s has disappeared from Prometheus target discovery.' % name, - summary: 'thanos component has disappeared from Prometheus target discovery.', + description: '%s has disappeared%s. Prometheus target for the component cannot be discovered.' % [name, location], + summary: 'Thanos component has disappeared%s.' % location, }, } for name in std.objectFields(thanos.jobs) diff --git a/mixin/alerts/bucket_replicate.libsonnet b/mixin/alerts/bucket_replicate.libsonnet index b4bd6958f5a..551dcb3ba7d 100644 --- a/mixin/alerts/bucket_replicate.libsonnet +++ b/mixin/alerts/bucket_replicate.libsonnet @@ -4,37 +4,25 @@ selector: error 'must provide selector for Thanos Bucket Replicate dashboard', errorThreshold: 10, p99LatencyThreshold: 20, + dimensions: std.join(', ', std.objectFields(thanos.targetGroups) + ['job']), }, prometheusAlerts+:: { groups+: if thanos.bucket_replicate == null then [] else [ + local location = if std.length(std.objectFields(thanos.targetGroups)) > 0 then ' in' + std.join('/', ['{{$labels.%s}}' % level for level in std.objectFields(thanos.targetGroups)]) else ' '; { name: 'thanos-bucket-replicate', rules: [ - { - alert: 'ThanosBucketReplicateIsDown', - expr: ||| - absent(up{%(selector)s}) - ||| % thanos.bucket_replicate, - 'for': '5m', - labels: { - severity: 'critical', - }, - annotations: { - description: 'Thanos Replicate has disappeared from Prometheus target discovery.', - summary: 'Thanos Replicate has disappeared from Prometheus target discovery.', - }, - }, { alert: 'ThanosBucketReplicateErrorRate', annotations: { - description: 'Thanos Replicate failing to run, {{ $value | humanize }}% of attempts failed.', - summary: 'Thanose Replicate is failing to run.', + description: 'Thanos Replicate is failing to run%s, {{$value | humanize}}%% of attempts failed.' % location, + summary: 'Thanose Replicate is failing to run in %s.' % location, }, expr: ||| ( - sum(rate(thanos_replicate_replication_runs_total{result="error", %(selector)s}[5m])) - / on (namespace) group_left - sum(rate(thanos_replicate_replication_runs_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_replicate_replication_runs_total{result="error", %(selector)s}[5m])) + / on (%(dimensions)s) group_left + sum by (%(dimensions)s) (rate(thanos_replicate_replication_runs_total{%(selector)s}[5m])) ) * 100 >= %(errorThreshold)s ||| % thanos.bucket_replicate, 'for': '5m', @@ -45,14 +33,14 @@ { alert: 'ThanosBucketReplicateRunLatency', annotations: { - description: 'Thanos Replicate {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for the replicate operations.', + description: 'Thanos Replicate {{$labels.job}}%shas a 99th percentile latency of {{$value}} seconds for the replicate operations.' % location, summary: 'Thanos Replicate has a high latency for replicate operations.', }, expr: ||| ( - histogram_quantile(0.99, sum by (job, le) (rate(thanos_replicate_replication_run_duration_seconds_bucket{%(selector)s}[5m]))) > %(p99LatencyThreshold)s + histogram_quantile(0.99, sum by (%(dimensions)s) (rate(thanos_replicate_replication_run_duration_seconds_bucket{%(selector)s}[5m]))) > %(p99LatencyThreshold)s and - sum by (job) (rate(thanos_replicate_replication_run_duration_seconds_bucket{%(selector)s}[5m])) > 0 + sum by (%(dimensions)s) (rate(thanos_replicate_replication_run_duration_seconds_bucket{%(selector)s}[5m])) > 0 ) ||| % thanos.bucket_replicate, 'for': '5m', diff --git a/mixin/alerts/compact.libsonnet b/mixin/alerts/compact.libsonnet index 75bea25b495..fa8b60b33a3 100644 --- a/mixin/alerts/compact.libsonnet +++ b/mixin/alerts/compact.libsonnet @@ -4,19 +4,21 @@ selector: error 'must provide selector for Thanos Compact alerts', compactionErrorThreshold: 5, bucketOpsErrorThreshold: 5, + dimensions: std.join(', ', std.objectFields(thanos.targetGroups) + ['job']), }, prometheusAlerts+:: { groups+: if thanos.compact == null then [] else [ + local location = if std.length(std.objectFields(thanos.targetGroups)) > 0 then ' in ' + std.join('/', ['{{$labels.%s}}' % level for level in std.objectFields(thanos.targetGroups)]) else ' '; { name: 'thanos-compact', rules: [ { alert: 'ThanosCompactMultipleRunning', annotations: { - description: 'No more than one Thanos Compact instance should be running at once. There are {{ $value }}', + description: 'No more than one Thanos Compact instance should be running at once. There are {{$value}}%s' % location, summary: 'Thanos Compact has multiple instances running.', }, - expr: 'sum(up{%(selector)s}) > 1' % thanos.compact, + expr: 'sum by (%(dimensions)s) (up{%(selector)s}) > 1' % thanos.compact, 'for': '5m', labels: { severity: 'warning', @@ -25,7 +27,7 @@ { alert: 'ThanosCompactHalted', annotations: { - description: 'Thanos Compact {{$labels.job}} has failed to run and now is halted.', + description: 'Thanos Compact {{$labels.job}} has failed to run%s and now is halted.' % location, summary: 'Thanos Compact has failed to run ans is now halted.', }, expr: 'thanos_compact_halted{%(selector)s} == 1' % thanos.compact, @@ -37,14 +39,14 @@ { alert: 'ThanosCompactHighCompactionFailures', annotations: { - description: 'Thanos Compact {{$labels.job}} is failing to execute {{ $value | humanize }}% of compactions.', + description: 'Thanos Compact {{$labels.job}}%s, is failing to execute {{$value | humanize}}%% of compactions.' % location, summary: 'Thanos Compact is failing to execute compactions.', }, expr: ||| ( - sum by (job) (rate(thanos_compact_group_compactions_failures_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_compact_group_compactions_failures_total{%(selector)s}[5m])) / - sum by (job) (rate(thanos_compact_group_compactions_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_compact_group_compactions_total{%(selector)s}[5m])) * 100 > %(compactionErrorThreshold)s ) ||| % thanos.compact, @@ -56,14 +58,14 @@ { alert: 'ThanosCompactBucketHighOperationFailures', annotations: { - description: 'Thanos Compact {{$labels.job}} Bucket is failing to execute {{ $value | humanize }}% of operations.', + description: 'Thanos Compact {{$labels.job}}%s, Bucket is failing to execute {{$value | humanize}}%% of operations.' % location, summary: 'Thanos Compact Bucket is having a high number of operation failures.', }, expr: ||| ( - sum by (job) (rate(thanos_objstore_bucket_operation_failures_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_objstore_bucket_operation_failures_total{%(selector)s}[5m])) / - sum by (job) (rate(thanos_objstore_bucket_operations_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_objstore_bucket_operations_total{%(selector)s}[5m])) * 100 > %(bucketOpsErrorThreshold)s ) ||| % thanos.compact, @@ -75,10 +77,10 @@ { alert: 'ThanosCompactHasNotRun', annotations: { - description: 'Thanos Compact {{$labels.job}} has not uploaded anything for 24 hours.', + description: 'Thanos Compact {{$labels.job}}%s has not uploaded anything for 24 hours.' % location, summary: 'Thanos Compact has not uploaded anything for last 24 hours.', }, - expr: '(time() - max(max_over_time(thanos_objstore_bucket_last_successful_upload_time{%(selector)s}[24h]))) / 60 / 60 > 24' % thanos.compact, + expr: '(time() - max by (%(dimensions)s) (max_over_time(thanos_objstore_bucket_last_successful_upload_time{%(selector)s}[24h]))) / 60 / 60 > 24' % thanos.compact, labels: { severity: 'warning', }, diff --git a/mixin/alerts/query.libsonnet b/mixin/alerts/query.libsonnet index f765c59c654..f1fb95b7ca4 100644 --- a/mixin/alerts/query.libsonnet +++ b/mixin/alerts/query.libsonnet @@ -7,23 +7,25 @@ dnsErrorThreshold: 1, p99QueryLatencyThreshold: 40, p99QueryRangeLatencyThreshold: 90, + dimensions: std.join(', ', std.objectFields(thanos.targetGroups) + ['job']), }, prometheusAlerts+:: { groups+: if thanos.query == null then [] else [ + local location = if std.length(std.objectFields(thanos.targetGroups)) > 0 then ' in ' + std.join('/', ['{{$labels.%s}}' % level for level in std.objectFields(thanos.targetGroups)]) else ' '; { name: 'thanos-query', rules: [ { alert: 'ThanosQueryHttpRequestQueryErrorRateHigh', annotations: { - description: 'Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize }}% of "query" requests.', + description: 'Thanos Query {{$labels.job}}%sis failing to handle {{$value | humanize}}%% of "query" requests.' % location, summary: 'Thanos Query is failing to handle requests.', }, expr: ||| ( - sum(rate(http_requests_total{code=~"5..", %(selector)s, handler="query"}[5m])) + sum by (%(dimensions)s) (rate(http_requests_total{code=~"5..", %(selector)s, handler="query"}[5m])) / - sum(rate(http_requests_total{%(selector)s, handler="query"}[5m])) + sum by (%(dimensions)s) (rate(http_requests_total{%(selector)s, handler="query"}[5m])) ) * 100 > %(httpErrorThreshold)s ||| % thanos.query, 'for': '5m', @@ -34,14 +36,14 @@ { alert: 'ThanosQueryHttpRequestQueryRangeErrorRateHigh', annotations: { - description: 'Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize }}% of "query_range" requests.', + description: 'Thanos Query {{$labels.job}}%sis failing to handle {{$value | humanize}}%% of "query_range" requests.' % location, summary: 'Thanos Query is failing to handle requests.', }, expr: ||| ( - sum(rate(http_requests_total{code=~"5..", %(selector)s, handler="query_range"}[5m])) + sum by (%(dimensions)s) (rate(http_requests_total{code=~"5..", %(selector)s, handler="query_range"}[5m])) / - sum(rate(http_requests_total{%(selector)s, handler="query_range"}[5m])) + sum by (%(dimensions)s) (rate(http_requests_total{%(selector)s, handler="query_range"}[5m])) ) * 100 > %(httpErrorThreshold)s ||| % thanos.query, 'for': '5m', @@ -52,14 +54,14 @@ { alert: 'ThanosQueryGrpcServerErrorRate', annotations: { - description: 'Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.', + description: 'Thanos Query {{$labels.job}}%sis failing to handle {{$value | humanize}}%% of requests.' % location, summary: 'Thanos Query is failing to handle requests.', }, expr: ||| ( - sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", %(selector)s}[5m])) + sum by (%(dimensions)s) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", %(selector)s}[5m])) / - sum by (job) (rate(grpc_server_started_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(grpc_server_started_total{%(selector)s}[5m])) * 100 > %(grpcErrorThreshold)s ) ||| % thanos.query, @@ -71,14 +73,14 @@ { alert: 'ThanosQueryGrpcClientErrorRate', annotations: { - description: 'Thanos Query {{$labels.job}} is failing to send {{ $value | humanize }}% of requests.', + description: 'Thanos Query {{$labels.job}}%sis failing to send {{$value | humanize}}%% of requests.' % location, summary: 'Thanos Query is failing to send requests.', }, expr: ||| ( - sum by (job) (rate(grpc_client_handled_total{grpc_code!="OK", %(selector)s}[5m])) + sum by (%(dimensions)s) (rate(grpc_client_handled_total{grpc_code!="OK", %(selector)s}[5m])) / - sum by (job) (rate(grpc_client_started_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(grpc_client_started_total{%(selector)s}[5m])) ) * 100 > %(grpcErrorThreshold)s ||| % thanos.query, 'for': '5m', @@ -89,14 +91,14 @@ { alert: 'ThanosQueryHighDNSFailures', annotations: { - description: 'Thanos Query {{$labels.job}} have {{ $value | humanize }}% of failing DNS queries for store endpoints.', + description: 'Thanos Query {{$labels.job}}%shave {{$value | humanize}}%% of failing DNS queries for store endpoints.' % location, summary: 'Thanos Query is having high number of DNS failures.', }, expr: ||| ( - sum by (job) (rate(thanos_query_store_apis_dns_failures_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_query_store_apis_dns_failures_total{%(selector)s}[5m])) / - sum by (job) (rate(thanos_query_store_apis_dns_lookups_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_query_store_apis_dns_lookups_total{%(selector)s}[5m])) ) * 100 > %(dnsErrorThreshold)s ||| % thanos.query, 'for': '15m', @@ -107,14 +109,14 @@ { alert: 'ThanosQueryInstantLatencyHigh', annotations: { - description: 'Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for instant queries.', + description: 'Thanos Query {{$labels.job}}%shas a 99th percentile latency of {{$value}} seconds for instant queries.' % location, summary: 'Thanos Query has high latency for queries.', }, expr: ||| ( - histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{%(selector)s, handler="query"}[5m]))) > %(p99QueryLatencyThreshold)s + histogram_quantile(0.99, sum by (%(dimensions)s, le) (rate(http_request_duration_seconds_bucket{%(selector)s, handler="query"}[5m]))) > %(p99QueryLatencyThreshold)s and - sum by (job) (rate(http_request_duration_seconds_bucket{%(selector)s, handler="query"}[5m])) > 0 + sum by (%(dimensions)s) (rate(http_request_duration_seconds_bucket{%(selector)s, handler="query"}[5m])) > 0 ) ||| % thanos.query, 'for': '10m', @@ -125,14 +127,14 @@ { alert: 'ThanosQueryRangeLatencyHigh', annotations: { - description: 'Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for range queries.', + description: 'Thanos Query {{$labels.job}}%shas a 99th percentile latency of {{$value}} seconds for range queries.' % location, summary: 'Thanos Query has high latency for queries.', }, expr: ||| ( - histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{%(selector)s, handler="query_range"}[5m]))) > %(p99QueryRangeLatencyThreshold)s + histogram_quantile(0.99, sum by (%(dimensions)s, le) (rate(http_request_duration_seconds_bucket{%(selector)s, handler="query_range"}[5m]))) > %(p99QueryRangeLatencyThreshold)s and - sum by (job) (rate(http_request_duration_seconds_count{%(selector)s, handler="query_range"}[5m])) > 0 + sum by (%(dimensions)s) (rate(http_request_duration_seconds_count{%(selector)s, handler="query_range"}[5m])) > 0 ) ||| % thanos.query, 'for': '10m', diff --git a/mixin/alerts/receive.libsonnet b/mixin/alerts/receive.libsonnet index 034bb26e46b..4e6006edf7a 100644 --- a/mixin/alerts/receive.libsonnet +++ b/mixin/alerts/receive.libsonnet @@ -6,23 +6,25 @@ forwardErrorThreshold: 20, refreshErrorThreshold: 0, p99LatencyThreshold: 10, + dimensions: std.join(', ', std.objectFields(thanos.targetGroups) + ['job']), }, prometheusAlerts+:: { groups+: if thanos.receive == null then [] else [ + local location = if std.length(std.objectFields(thanos.targetGroups)) > 0 then ' in ' + std.join('/', ['{{$labels.%s}}' % level for level in std.objectFields(thanos.targetGroups)]) else ' '; { name: 'thanos-receive', rules: [ { alert: 'ThanosReceiveHttpRequestErrorRateHigh', annotations: { - description: 'Thanos Receive {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.', + description: 'Thanos Receive {{$labels.job}}%sis failing to handle {{$value | humanize}}%% of requests.' % location, summary: 'Thanos Receive is failing to handle requests.', }, expr: ||| ( - sum(rate(http_requests_total{code=~"5..", %(selector)s, handler="receive"}[5m])) + sum by (%(dimensions)s) (rate(http_requests_total{code=~"5..", %(selector)s, handler="receive"}[5m])) / - sum(rate(http_requests_total{%(selector)s, handler="receive"}[5m])) + sum by (%(dimensions)s) (rate(http_requests_total{%(selector)s, handler="receive"}[5m])) ) * 100 > %(httpErrorThreshold)s ||| % thanos.receive, 'for': '5m', @@ -33,14 +35,14 @@ { alert: 'ThanosReceiveHttpRequestLatencyHigh', annotations: { - description: 'Thanos Receive {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for requests.', + description: 'Thanos Receive {{$labels.job}}%shas a 99th percentile latency of {{ $value }} seconds for requests.' % location, summary: 'Thanos Receive has high HTTP requests latency.', }, expr: ||| ( - histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{%(selector)s, handler="receive"}[5m]))) > %(p99LatencyThreshold)s + histogram_quantile(0.99, sum by (%(dimensions)s, le) (rate(http_request_duration_seconds_bucket{%(selector)s, handler="receive"}[5m]))) > %(p99LatencyThreshold)s and - sum by (job) (rate(http_request_duration_seconds_count{%(selector)s, handler="receive"}[5m])) > 0 + sum by (%(dimensions)s) (rate(http_request_duration_seconds_count{%(selector)s, handler="receive"}[5m])) > 0 ) ||| % thanos.receive, 'for': '10m', @@ -51,7 +53,7 @@ { alert: 'ThanosReceiveHighReplicationFailures', annotations: { - description: 'Thanos Receive {{$labels.job}} is failing to replicate {{ $value | humanize }}% of requests.', + description: 'Thanos Receive {{$labels.job}}%sis failing to replicate {{$value | humanize}}%% of requests.' % location, summary: 'Thanos Receive is having high number of replication failures.', }, expr: ||| @@ -59,15 +61,15 @@ and ( ( - sum by (job) (rate(thanos_receive_replications_total{result="error", %(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_receive_replications_total{result="error", %(selector)s}[5m])) / - sum by (job) (rate(thanos_receive_replications_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_receive_replications_total{%(selector)s}[5m])) ) > ( - max by (job) (floor((thanos_receive_replication_factor{%(selector)s}+1) / 2)) + max by (%(dimensions)s) (floor((thanos_receive_replication_factor{%(selector)s}+1) / 2)) / - max by (job) (thanos_receive_hashring_nodes{%(selector)s}) + max by (%(dimensions)s) (thanos_receive_hashring_nodes{%(selector)s}) ) ) * 100 ||| % thanos.receive, @@ -79,14 +81,14 @@ { alert: 'ThanosReceiveHighForwardRequestFailures', annotations: { - description: 'Thanos Receive {{$labels.job}} is failing to forward {{ $value | humanize }}% of requests.', + description: 'Thanos Receive {{$labels.job}}%sis failing to forward {{$value | humanize}}%% of requests.' % location, summary: 'Thanos Receive is failing to forward requests.', }, expr: ||| ( - sum by (job) (rate(thanos_receive_forward_requests_total{result="error", %(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_receive_forward_requests_total{result="error", %(selector)s}[5m])) / - sum by (job) (rate(thanos_receive_forward_requests_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_receive_forward_requests_total{%(selector)s}[5m])) ) * 100 > %(forwardErrorThreshold)s ||| % thanos.receive, 'for': '5m', @@ -97,14 +99,14 @@ { alert: 'ThanosReceiveHighHashringFileRefreshFailures', annotations: { - description: 'Thanos Receive {{$labels.job}} is failing to refresh hashring file, {{ $value | humanize }} of attempts failed.', + description: 'Thanos Receive {{$labels.job}}%sis failing to refresh hashring file, {{$value | humanize}} of attempts failed.' % location, summary: 'Thanos Receive is failing to refresh hasring file.', }, expr: ||| ( - sum by (job) (rate(thanos_receive_hashrings_file_errors_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_receive_hashrings_file_errors_total{%(selector)s}[5m])) / - sum by (job) (rate(thanos_receive_hashrings_file_refreshes_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_receive_hashrings_file_refreshes_total{%(selector)s}[5m])) > %(refreshErrorThreshold)s ) ||| % thanos.receive, @@ -116,10 +118,10 @@ { alert: 'ThanosReceiveConfigReloadFailure', annotations: { - description: 'Thanos Receive {{$labels.job}} has not been able to reload hashring configurations.', + description: 'Thanos Receive {{$labels.job}}%shas not been able to reload hashring configurations.' % location, summary: 'Thanos Receive has not been able to reload configuration.', }, - expr: 'avg(thanos_receive_config_last_reload_successful{%(selector)s}) by (job) != 1' % thanos.receive, + expr: 'avg by (%(dimensions)s) (thanos_receive_config_last_reload_successful{%(selector)s}) != 1' % thanos.receive, 'for': '5m', labels: { severity: 'warning', @@ -128,13 +130,13 @@ { alert: 'ThanosReceiveNoUpload', annotations: { - description: 'Thanos Receive {{ $labels.instance }} of {{$labels.job}} has not uploaded latest data to object storage.', + description: 'Thanos Receive {{$labels.instance}}%shas not uploaded latest data to object storage.' % location, summary: 'Thanos Receive has not uploaded latest data to object storage.', }, expr: ||| (up{%(selector)s} - 1) - + on (instance) # filters to only alert on current instance last 3h - (sum by (instance) (increase(thanos_shipper_uploads_total{%(selector)s}[3h])) == 0) + + on (%(dimensions)s, instance) # filters to only alert on current instance last 3h + (sum by (%(dimensions)s, instance) (increase(thanos_shipper_uploads_total{%(selector)s}[3h])) == 0) ||| % thanos.receive, 'for': '3h', labels: { diff --git a/mixin/alerts/rule.libsonnet b/mixin/alerts/rule.libsonnet index d019a401994..69c3f14f482 100644 --- a/mixin/alerts/rule.libsonnet +++ b/mixin/alerts/rule.libsonnet @@ -6,20 +6,22 @@ rulerDnsErrorThreshold: 1, alertManagerDnsErrorThreshold: 1, evalErrorThreshold: 5, + dimensions: std.join(', ', std.objectFields(thanos.targetGroups) + ['job', 'instance']), }, prometheusAlerts+:: { groups+: if thanos.rule == null then [] else [ + local location = if std.length(std.objectFields(thanos.targetGroups)) > 0 then ' in ' + std.join('/', ['{{$labels.%s}}' % level for level in std.objectFields(thanos.targetGroups)]) else ' '; { name: 'thanos-rule', rules: [ { alert: 'ThanosRuleQueueIsDroppingAlerts', annotations: { - description: 'Thanos Rule {{$labels.job}} is failing to queue alerts.', + description: 'Thanos Rule {{$labels.instance}}%sis failing to queue alerts.' % location, summary: 'Thanos Rule is failing to queue alerts.', }, expr: ||| - sum by (job) (rate(thanos_alert_queue_alerts_dropped_total{%(selector)s}[5m])) > 0 + sum by (%(dimensions)s) (rate(thanos_alert_queue_alerts_dropped_total{%(selector)s}[5m])) > 0 ||| % thanos.rule, 'for': '5m', labels: { @@ -29,11 +31,11 @@ { alert: 'ThanosRuleSenderIsFailingAlerts', annotations: { - description: 'Thanos Rule {{$labels.job}} is failing to send alerts to alertmanager.', + description: 'Thanos Rule {{$labels.instance}}%sis failing to send alerts to alertmanager.' % location, summary: 'Thanos Rule is failing to send alerts to alertmanager.', }, expr: ||| - sum by (job) (rate(thanos_alert_sender_alerts_dropped_total{%(selector)s}[5m])) > 0 + sum by (%(dimensions)s) (rate(thanos_alert_sender_alerts_dropped_total{%(selector)s}[5m])) > 0 ||| % thanos.rule, 'for': '5m', labels: { @@ -43,14 +45,14 @@ { alert: 'ThanosRuleHighRuleEvaluationFailures', annotations: { - description: 'Thanos Rule {{$labels.job}} is failing to evaluate rules.', + description: 'Thanos Rule {{$labels.instance}}%sis failing to evaluate rules.' % location, summary: 'Thanos Rule is failing to evaluate rules.', }, expr: ||| ( - sum by (job) (rate(prometheus_rule_evaluation_failures_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(prometheus_rule_evaluation_failures_total{%(selector)s}[5m])) / - sum by (job) (rate(prometheus_rule_evaluations_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(prometheus_rule_evaluations_total{%(selector)s}[5m])) * 100 > %(evalErrorThreshold)s ) ||| % thanos.rule, @@ -63,11 +65,11 @@ { alert: 'ThanosRuleHighRuleEvaluationWarnings', annotations: { - description: 'Thanos Rule {{$labels.job}} has high number of evaluation warnings.', + description: 'Thanos Rule {{$labels.instance}}%shas high number of evaluation warnings.' % location, summary: 'Thanos Rule has high number of evaluation warnings.', }, expr: ||| - sum by (job) (rate(thanos_rule_evaluation_with_warnings_total{%(selector)s}[5m])) > 0 + sum by (%(dimensions)s) (rate(thanos_rule_evaluation_with_warnings_total{%(selector)s}[5m])) > 0 ||| % thanos.rule, 'for': '15m', @@ -78,14 +80,14 @@ { alert: 'ThanosRuleRuleEvaluationLatencyHigh', annotations: { - description: 'Thanos Rule {{$labels.job}}/{{$labels.instance}} has higher evaluation latency than interval for {{$labels.rule_group}}.', + description: 'Thanos Rule {{$labels.instance}}%shas higher evaluation latency than interval for {{$labels.rule_group}}.' % location, summary: 'Thanos Rule has high rule evaluation latency.', }, expr: ||| ( - sum by (job, instance, rule_group) (prometheus_rule_group_last_duration_seconds{%(selector)s}) + sum by (%(dimensions)s, rule_group) (prometheus_rule_group_last_duration_seconds{%(selector)s}) > - sum by (job, instance, rule_group) (prometheus_rule_group_interval_seconds{%(selector)s}) + sum by (%(dimensions)s, rule_group) (prometheus_rule_group_interval_seconds{%(selector)s}) ) ||| % thanos.rule, 'for': '5m', @@ -96,14 +98,14 @@ { alert: 'ThanosRuleGrpcErrorRate', annotations: { - description: 'Thanos Rule {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.', + description: 'Thanos Rule {{$labels.job}}%sis failing to handle {{$value | humanize}}%% of requests.' % location, summary: 'Thanos Rule is failing to handle grpc requests.', }, expr: ||| ( - sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", %(selector)s}[5m])) + sum by (%(dimensions)s) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", %(selector)s}[5m])) / - sum by (job) (rate(grpc_server_started_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(grpc_server_started_total{%(selector)s}[5m])) * 100 > %(grpcErrorThreshold)s ) ||| % thanos.rule, @@ -115,10 +117,10 @@ { alert: 'ThanosRuleConfigReloadFailure', annotations: { - description: 'Thanos Rule {{$labels.job}} has not been able to reload its configuration.', + description: 'Thanos Rule {{$labels.job}}%shas not been able to reload its configuration.' % location, summary: 'Thanos Rule has not been able to reload configuration.', }, - expr: 'avg(thanos_rule_config_last_reload_successful{%(selector)s}) by (job) != 1' % thanos.rule, + expr: 'avg by (%(dimensions)s) (thanos_rule_config_last_reload_successful{%(selector)s}) != 1' % thanos.rule, 'for': '5m', labels: { severity: 'info', @@ -127,14 +129,14 @@ { alert: 'ThanosRuleQueryHighDNSFailures', annotations: { - description: 'Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing DNS queries for query endpoints.', + description: 'Thanos Rule {{$labels.job}}%shas {{$value | humanize}}%% of failing DNS queries for query endpoints.' % location, summary: 'Thanos Rule is having high number of DNS failures.', }, expr: ||| ( - sum by (job) (rate(thanos_rule_query_apis_dns_failures_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_rule_query_apis_dns_failures_total{%(selector)s}[5m])) / - sum by (job) (rate(thanos_rule_query_apis_dns_lookups_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_rule_query_apis_dns_lookups_total{%(selector)s}[5m])) * 100 > %(rulerDnsErrorThreshold)s ) ||| % thanos.rule, @@ -146,14 +148,14 @@ { alert: 'ThanosRuleAlertmanagerHighDNSFailures', annotations: { - description: 'Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing DNS queries for Alertmanager endpoints.', + description: 'Thanos Rule {{$labels.instance}}%shas {{$value | humanize}}%% of failing DNS queries for Alertmanager endpoints.' % location, summary: 'Thanos Rule is having high number of DNS failures.', }, expr: ||| ( - sum by (job) (rate(thanos_rule_alertmanagers_dns_failures_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_rule_alertmanagers_dns_failures_total{%(selector)s}[5m])) / - sum by (job) (rate(thanos_rule_alertmanagers_dns_lookups_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_rule_alertmanagers_dns_lookups_total{%(selector)s}[5m])) * 100 > %(alertManagerDnsErrorThreshold)s ) ||| % thanos.rule, @@ -166,13 +168,13 @@ // NOTE: This alert will give false positive if no rules are configured. alert: 'ThanosRuleNoEvaluationFor10Intervals', annotations: { - description: 'Thanos Rule {{$labels.job}} has {{ $value | humanize }}% rule groups that did not evaluate for at least 10x of their expected interval.', + description: 'Thanos Rule {{$labels.job}}%shas {{$value | humanize}}%% rule groups that did not evaluate for at least 10x of their expected interval.' % location, summary: 'Thanos Rule has rule groups that did not evaluate for 10 intervals.', }, expr: ||| - time() - max by (job, group) (prometheus_rule_group_last_evaluation_timestamp_seconds{%(selector)s}) + time() - max by (%(dimensions)s, group) (prometheus_rule_group_last_evaluation_timestamp_seconds{%(selector)s}) > - 10 * max by (job, group) (prometheus_rule_group_interval_seconds{%(selector)s}) + 10 * max by (%(dimensions)s, group) (prometheus_rule_group_interval_seconds{%(selector)s}) ||| % thanos.rule, 'for': '5m', labels: { @@ -183,15 +185,15 @@ { alert: 'ThanosNoRuleEvaluations', annotations: { - description: 'Thanos Rule {{$labels.job}} did not perform any rule evaluations in the past 2 minutes.', + description: 'Thanos Rule {{$labels.instance}}%sdid not perform any rule evaluations in the past 10 minutes.' % location, summary: 'Thanos Rule did not perform any rule evaluations.', }, expr: ||| - sum(rate(prometheus_rule_evaluations_total{%(selector)s}[2m])) <= 0 + sum by (%(dimensions)s) (rate(prometheus_rule_evaluations_total{%(selector)s}[5m])) <= 0 and - sum(thanos_rule_loaded_rules{%(selector)s}) > 0 + sum by (%(dimensions)s) (thanos_rule_loaded_rules{%(selector)s}) > 0 ||| % thanos.rule, - 'for': '3m', + 'for': '5m', labels: { severity: 'critical', }, diff --git a/mixin/alerts/sidecar.libsonnet b/mixin/alerts/sidecar.libsonnet index ee09c9aeb88..f46ba181f12 100644 --- a/mixin/alerts/sidecar.libsonnet +++ b/mixin/alerts/sidecar.libsonnet @@ -2,20 +2,22 @@ local thanos = self, sidecar+:: { selector: error 'must provide selector for Thanos Sidecar alerts', + dimensions: std.join(', ', std.objectFields(thanos.targetGroups) + ['job', 'instance']), }, prometheusAlerts+:: { groups+: if thanos.sidecar == null then [] else [ + local location = if std.length(std.objectFields(thanos.targetGroups)) > 0 then ' in ' + std.join('/', ['{{$labels.%s}}' % level for level in std.objectFields(thanos.targetGroups)]) else ' '; { name: 'thanos-sidecar', rules: [ { alert: 'ThanosSidecarPrometheusDown', annotations: { - description: 'Thanos Sidecar {{$labels.job}} {{$labels.instance}} cannot connect to Prometheus.', + description: 'Thanos Sidecar {{$labels.instance}}%scannot connect to Prometheus.' % location, summary: 'Thanos Sidecar cannot connect to Prometheus', }, expr: ||| - sum by (job, instance) (thanos_sidecar_prometheus_up{%(selector)s} == 0) + thanos_sidecar_prometheus_up{%(selector)s} == 0 ||| % thanos.sidecar, 'for': '5m', labels: { @@ -25,11 +27,11 @@ { alert: 'ThanosSidecarBucketOperationsFailed', annotations: { - description: 'Thanos Sidecar {{$labels.job}} {{$labels.instance}} bucket operations are failing', + description: 'Thanos Sidecar {{$labels.instance}}%sbucket operations are failing' % location, summary: 'Thanos Sidecar bucket operations are failing', }, expr: ||| - rate(thanos_objstore_bucket_operation_failures_total{%(selector)s}[5m]) > 0 + sum by (%(dimensions)s) (rate(thanos_objstore_bucket_operation_failures_total{%(selector)s}[5m])) > 0 ||| % thanos.sidecar, 'for': '5m', labels: { @@ -39,11 +41,11 @@ { alert: 'ThanosSidecarUnhealthy', annotations: { - description: 'Thanos Sidecar {{$labels.job}} {{$labels.instance}} is unhealthy for more than {{$value}} seconds.', + description: 'Thanos Sidecar {{$labels.instance}}%sis unhealthy for {{$value}} seconds.' % location, summary: 'Thanos Sidecar is unhealthy.', }, expr: ||| - time() - max by (job, instance) (timestamp(thanos_sidecar_last_heartbeat_success_time_seconds{%(selector)s})) >= 240 + time() - max by (%(dimensions)s) (thanos_sidecar_last_heartbeat_success_time_seconds{%(selector)s}) >= 600 ||| % thanos.sidecar, labels: { severity: 'critical', diff --git a/mixin/alerts/store.libsonnet b/mixin/alerts/store.libsonnet index 8fb25e9cb32..7ab8279da6b 100644 --- a/mixin/alerts/store.libsonnet +++ b/mixin/alerts/store.libsonnet @@ -7,23 +7,25 @@ seriesGateErrorThreshold: 2, bucketOpsErrorThreshold: 5, bucketOpsP99LatencyThreshold: 2, + dimensions: std.join(', ', std.objectFields(thanos.targetGroups) + ['job']), }, prometheusAlerts+:: { groups+: if thanos.store == null then [] else [ + local location = if std.length(std.objectFields(thanos.targetGroups)) > 0 then ' in ' + std.join('/', ['{{$labels.%s}}' % level for level in std.objectFields(thanos.targetGroups)]) else ' '; { name: 'thanos-store', rules: [ { alert: 'ThanosStoreGrpcErrorRate', annotations: { - description: 'Thanos Store {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.', + description: 'Thanos Store {{$labels.job}}%sis failing to handle {{$value | humanize}}%% of requests.' % location, summary: 'Thanos Store is failing to handle qrpcd requests.', }, expr: ||| ( - sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", %(selector)s}[5m])) + sum by (%(dimensions)s) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", %(selector)s}[5m])) / - sum by (job) (rate(grpc_server_started_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(grpc_server_started_total{%(selector)s}[5m])) * 100 > %(grpcErrorThreshold)s ) ||| % thanos.store, @@ -35,14 +37,14 @@ { alert: 'ThanosStoreSeriesGateLatencyHigh', annotations: { - description: 'Thanos Store {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for store series gate requests.', + description: 'Thanos Store {{$labels.job}}%shas a 99th percentile latency of {{$value}} seconds for store series gate requests.' % location, summary: 'Thanos Store has high latency for store series gate requests.', }, expr: ||| ( - histogram_quantile(0.99, sum by (job, le) (rate(thanos_bucket_store_series_gate_duration_seconds_bucket{%(selector)s}[5m]))) > %(seriesGateErrorThreshold)s + histogram_quantile(0.99, sum by (%(dimensions)s, le) (rate(thanos_bucket_store_series_gate_duration_seconds_bucket{%(selector)s}[5m]))) > %(seriesGateErrorThreshold)s and - sum by (job) (rate(thanos_bucket_store_series_gate_duration_seconds_count{%(selector)s}[5m])) > 0 + sum by (%(dimensions)s) (rate(thanos_bucket_store_series_gate_duration_seconds_count{%(selector)s}[5m])) > 0 ) ||| % thanos.store, 'for': '10m', @@ -53,14 +55,14 @@ { alert: 'ThanosStoreBucketHighOperationFailures', annotations: { - description: 'Thanos Store {{$labels.job}} Bucket is failing to execute {{ $value | humanize }}% of operations.', + description: 'Thanos Store {{$labels.job}}%sBucket is failing to execute {{$value | humanize}}%% of operations.' % location, summary: 'Thanos Store Bucket is failing to execute operations.', }, expr: ||| ( - sum by (job) (rate(thanos_objstore_bucket_operation_failures_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_objstore_bucket_operation_failures_total{%(selector)s}[5m])) / - sum by (job) (rate(thanos_objstore_bucket_operations_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_objstore_bucket_operations_total{%(selector)s}[5m])) * 100 > %(bucketOpsErrorThreshold)s ) ||| % thanos.store, @@ -72,14 +74,14 @@ { alert: 'ThanosStoreObjstoreOperationLatencyHigh', annotations: { - description: 'Thanos Store {{$labels.job}} Bucket has a 99th percentile latency of {{ $value }} seconds for the bucket operations.', + description: 'Thanos Store {{$labels.job}}%sBucket has a 99th percentile latency of {{$value}} seconds for the bucket operations.' % location, summary: 'Thanos Store is having high latency for bucket operations.', }, expr: ||| ( - histogram_quantile(0.99, sum by (job, le) (rate(thanos_objstore_bucket_operation_duration_seconds_bucket{%(selector)s}[5m]))) > %(bucketOpsP99LatencyThreshold)s + histogram_quantile(0.99, sum by (%(dimensions)s, le) (rate(thanos_objstore_bucket_operation_duration_seconds_bucket{%(selector)s}[5m]))) > %(bucketOpsP99LatencyThreshold)s and - sum by (job) (rate(thanos_objstore_bucket_operation_duration_seconds_count{%(selector)s}[5m])) > 0 + sum by (%(dimensions)s) (rate(thanos_objstore_bucket_operation_duration_seconds_count{%(selector)s}[5m])) > 0 ) ||| % thanos.store, 'for': '10m', diff --git a/mixin/config.libsonnet b/mixin/config.libsonnet index d0123e66128..b30a3b3cdbb 100644 --- a/mixin/config.libsonnet +++ b/mixin/config.libsonnet @@ -1,45 +1,68 @@ { + local thanos = self, + // TargetGroups is a way to help mixin users to add high level target grouping to their alerts and dashboards. + // With the help of TargetGroups you can use a single observability stack to monitor several Thanos instances. + // The key in the key-value pair will be used as "label name" in the alerts and variable name in the dashboards. + // The value in the key-value pair will be used as a query to fetch available values for the given label name. + targetGroups+:: { + // For example for given following groups, + // namespace: 'thanos_status', + // cluster: 'find_mi_cluster_bitte', + // zone: 'an_i_in_da_zone', + // region: 'losing_my_region', + // will generate queriers for the alerts as follows: + // ( + // sum by (cluster, namespace, region, zone, job) (rate(thanos_compact_group_compactions_failures_total{job=~"thanos-compact.*"}[5m])) + // / + // sum by (cluster, namespace, region, zone, job) (rate(thanos_compact_group_compactions_total{job=~"thanos-compact.*"}[5m])) + // * 100 > 5 + // ) + // + // AND for the dashborads: + // + // sum by (cluster, namespace, region, zone, job) (rate(thanos_compact_group_compactions_failures_total{cluster=\"$cluster\", namespace=\"$namespace\", region=\"$region\", zone=\"$zone\", job=\"$job\"}[$interval])) + // / + // sum by (cluster, namespace, region, zone, job) (rate(thanos_compact_group_compactions_total{cluster=\"$cluster\", namespace=\"$namespace\", region=\"$region\", zone=\"$zone\", job=\"$job\"}[$interval])) + }, query+:: { - jobPrefix: 'thanos-query', - selector: 'job=~"%s.*"' % self.jobPrefix, + selector: 'job=~"thanos-query.*"', title: '%(prefix)sQuery' % $.dashboard.prefix, }, store+:: { - jobPrefix: 'thanos-store', - selector: 'job=~"%s.*"' % self.jobPrefix, + selector: 'job=~"thanos-store.*"', title: '%(prefix)sStore' % $.dashboard.prefix, }, receive+:: { - jobPrefix: 'thanos-receive', - selector: 'job=~"%s.*"' % self.jobPrefix, + selector: 'job=~"thanos-receive.*"', title: '%(prefix)sReceive' % $.dashboard.prefix, }, rule+:: { - jobPrefix: 'thanos-rule', - selector: 'job=~"%s.*"' % self.jobPrefix, + selector: 'job=~"thanos-rule.*"', title: '%(prefix)sRule' % $.dashboard.prefix, }, compact+:: { - jobPrefix: 'thanos-compact', - selector: 'job=~"%s.*"' % self.jobPrefix, + selector: 'job=~"thanos-compact.*"', title: '%(prefix)sCompact' % $.dashboard.prefix, }, sidecar+:: { - jobPrefix: 'thanos-sidecar', - selector: 'job=~"%s.*"' % self.jobPrefix, + selector: 'job=~"thanos-sidecar.*"', title: '%(prefix)sSidecar' % $.dashboard.prefix, }, + // TODO(kakkoyun): Fix naming convention: bucketReplicate bucket_replicate+:: { - jobPrefix: 'thanos-bucket-replicate', - selector: 'job=~"%s.*"' % self.jobPrefix, + selector: 'job=~"thanos-bucket-replicate.*"', title: '%(prefix)sBucketReplicate' % $.dashboard.prefix, }, - overview+:: { - title: '%(prefix)sOverview' % $.dashboard.prefix, - }, dashboard+:: { prefix: 'Thanos / ', tags: ['thanos-mixin'], - namespaceQuery: 'thanos_status', + selector: ['%s="$%s"' % [level, level] for level in std.objectFields(thanos.targetGroups)], + dimensions: ['%s' % level for level in std.objectFields(thanos.targetGroups)], + + overview+:: { + title: '%(prefix)sOverview' % $.dashboard.prefix, + selector: std.join(', ', thanos.dashboard.selector), + dimensions: std.join(', ', thanos.dashboard.dimensions + ['job']), + }, }, } diff --git a/mixin/dashboards.jsonnet b/mixin/dashboards.jsonnet index 94353a6c150..4e4c5e4e873 100644 --- a/mixin/dashboards.jsonnet +++ b/mixin/dashboards.jsonnet @@ -1,7 +1,6 @@ -local dashboards = - ( - import 'mixin.libsonnet' - ).grafanaDashboards; +local dashboards = ( + import 'mixin.libsonnet' +).grafanaDashboards; { [name]: dashboards[name] diff --git a/mixin/dashboards/bucket_replicate.libsonnet b/mixin/dashboards/bucket_replicate.libsonnet index 882ef35d5a7..82ae53380ee 100644 --- a/mixin/dashboards/bucket_replicate.libsonnet +++ b/mixin/dashboards/bucket_replicate.libsonnet @@ -3,9 +3,12 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; { local thanos = self, bucket_replicate+:: { - jobPrefix: error 'must provide job prefix for Thanos Bucket Replicate dashboard', selector: error 'must provide selector for Thanos Bucket Replicate dashboard', title: error 'must provide title for Thanos Bucket Replicate dashboard', + dashboard:: { + selector: std.join(', ', thanos.dashboard.selector + ['job="$job"']), + dimensions: std.join(', ', thanos.dashboard.dimensions + ['job']), + }, }, grafanaDashboards+:: { [if thanos.bucket_replicate != null then 'bucket_replicate.json']: @@ -15,14 +18,15 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Rate') + g.qpsErrTotalPanel( - 'thanos_replicate_replication_runs_total{result="error", namespace="$namespace",%(selector)s}' % thanos.bucket_replicate, - 'thanos_replicate_replication_runs_total{namespace="$namespace",%(selector)s}' % thanos.bucket_replicate, + 'thanos_replicate_replication_runs_total{result="error", %s}' % thanos.bucket_replicate.dashboard.selector, + 'thanos_replicate_replication_runs_total{%s}' % thanos.bucket_replicate.dashboard.selector, + thanos.rule.dashboard.dimensions ) ) .addPanel( g.panel('Errors', 'Shows rate of errors.') + g.queryPanel( - 'sum(rate(thanos_replicate_replication_runs_total{result="error", namespace="$namespace",%(selector)s}[$interval])) by (result)' % thanos.bucket_replicate, + 'sum by (%(dimensions)s, result) (rate(thanos_replicate_replication_runs_total{result="error", %(selector)s}[$interval]))' % thanos.bucket_replicate.dashboard, '{{result}}' ) + { yaxes: g.yaxes('percentunit') } + @@ -30,7 +34,11 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; ) .addPanel( g.panel('Duration', 'Shows how long has it taken to run a replication cycle.') + - g.latencyPanel('thanos_replicate_replication_run_duration_seconds', 'result="success", namespace="$namespace",%(selector)s' % thanos.bucket_replicate) + g.latencyPanel( + 'thanos_replicate_replication_run_duration_seconds', + 'result="success", %s' % thanos.bucket_replicate.dashboard.selector, + thanos.rule.dashboard.dimensions + ) ) ) .addRow( @@ -39,18 +47,15 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; g.panel('Metrics') + g.queryPanel( [ - 'sum(rate(blocks_meta_synced{state="loaded",namespace="$namespace",%(selector)s}[$interval]))' % thanos.bucket_replicate, - 'sum(rate(blocks_meta_synced{state="failed",namespace="$namespace",%(selector)s}[$interval]))' % thanos.bucket_replicate, - 'sum(rate(thanos_replicate_blocks_already_replicated_total{namespace="$namespace",%(selector)s}[$interval]))' % thanos.bucket_replicate, - 'sum(rate(thanos_replicate_blocks_replicated_total{namespace="$namespace",%(selector)s}[$interval]))' % thanos.bucket_replicate, - 'sum(rate(thanos_replicate_objects_replicated_total{namespace="$namespace",%(selector)s}[$interval]))' % thanos.bucket_replicate, + 'sum by (%(dimensions)s) (rate(blocks_meta_synced{state="loaded", %(selector)s}[$interval]))' % thanos.bucket_replicate.dashboard, + 'sum by (%(dimensions)s) (rate(blocks_meta_synced{state="failed", %(selector)s}[$interval]))' % thanos.bucket_replicate.dashboard, + 'sum by (%(dimensions)s) (rate(thanos_replicate_blocks_already_replicated_total{%(selector)s}[$interval]))' % thanos.bucket_replicate.dashboard, + 'sum by (%(dimensions)s) (rate(thanos_replicate_blocks_replicated_total{%(selector)s}[$interval]))' % thanos.bucket_replicate.dashboard, + 'sum by (%(dimensions)s) (rate(thanos_replicate_objects_replicated_total{%(selector)s}[$interval]))' % thanos.bucket_replicate.dashboard, ], ['meta loads', 'partial meta reads', 'already replicated blocks', 'replicated blocks', 'replicated objects'] ) ) - ) - + - g.template('namespace', thanos.dashboard.namespaceQuery) + - g.template('job', 'up', 'namespace="$namespace", %(selector)s' % thanos.bucket_replicate, true, '%(jobPrefix)s.*' % thanos.bucket_replicate), + ), }, } diff --git a/mixin/dashboards/compact.libsonnet b/mixin/dashboards/compact.libsonnet index 74392331823..b5db2516137 100644 --- a/mixin/dashboards/compact.libsonnet +++ b/mixin/dashboards/compact.libsonnet @@ -1,11 +1,15 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; +local utils = import '../lib/utils.libsonnet'; { local thanos = self, compact+:: { - jobPrefix: error 'must provide job prefix for Thanos Compact dashboard', selector: error 'must provide selector for Thanos Compact dashboard', title: error 'must provide title for Thanos Compact dashboard', + dashboard:: { + selector: std.join(', ', thanos.dashboard.selector + ['job="$job"']), + dimensions: std.join(', ', thanos.dashboard.dimensions + ['job']), + }, }, grafanaDashboards+:: { [if thanos.compact != null then 'compact.json']: @@ -18,7 +22,7 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; 'Shows rate of execution for compactions against blocks that are stored in the bucket by compaction group.' ) + g.queryPanel( - 'sum(rate(thanos_compact_group_compactions_total{namespace="$namespace",job=~"$job"}[$interval])) by (job, group)', + 'sum by (%(dimensions)s, group) (rate(thanos_compact_group_compactions_total{%(selector)s}[$interval]))' % thanos.compact.dashboard, 'compaction {{job}} {{group}}' ) + g.stack @@ -29,8 +33,9 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; 'Shows ratio of errors compared to the total number of executed compactions against blocks that are stored in the bucket.' ) + g.qpsErrTotalPanel( - 'thanos_compact_group_compactions_failures_total{namespace="$namespace",job=~"$job"}', - 'thanos_compact_group_compactions_total{namespace="$namespace",job=~"$job"}', + 'thanos_compact_group_compactions_failures_total{%(selector)s}' % thanos.compact.dashboard.selector, + 'thanos_compact_group_compactions_total{%(selector)s}' % thanos.compact.dashboard.selector, + thanos.rule.dashboard.dimensions ) ) ) @@ -42,7 +47,7 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; 'Shows rate of execution for downsampling against blocks that are stored in the bucket by compaction group.' ) + g.queryPanel( - 'sum(rate(thanos_compact_downsample_total{namespace="$namespace",job=~"$job"}[$interval])) by (job, group)', + 'sum by (%(dimensions)s, group) (rate(thanos_compact_downsample_total{%(selector)s}[$interval]))' % thanos.compact.dashboard, 'downsample {{job}} {{group}}' ) + g.stack @@ -50,8 +55,9 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Errors', 'Shows ratio of errors compared to the total number of executed downsampling against blocks that are stored in the bucket.') + g.qpsErrTotalPanel( - 'thanos_compact_downsample_failed_total{namespace="$namespace",job=~"$job"}', - 'thanos_compact_downsample_total{namespace="$namespace",job=~"$job"}', + 'thanos_compact_downsample_failed_total{%(selector)s}' % thanos.compact.dashboard.selector, + 'thanos_compact_downsample_total{%(selector)s}' % thanos.compact.dashboard.selector, + thanos.rule.dashboard.dimensions ) ) ) @@ -63,7 +69,7 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; 'Shows rate of execution for removals of blocks if their data is available as part of a block with a higher compaction level.' ) + g.queryPanel( - 'sum(rate(thanos_compact_garbage_collection_total{namespace="$namespace",job=~"$job"}[$interval])) by (job)', + 'sum by (%(dimensions)s) (rate(thanos_compact_garbage_collection_total{%(selector)s}[$interval]))' % thanos.compact.dashboard, 'garbage collection {{job}}' ) + g.stack @@ -71,13 +77,14 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Errors', 'Shows ratio of errors compared to the total number of executed garbage collections.') + g.qpsErrTotalPanel( - 'thanos_compact_garbage_collection_failures_total{namespace="$namespace",job=~"$job"}', - 'thanos_compact_garbage_collection_total{namespace="$namespace",job=~"$job"}', + 'thanos_compact_garbage_collection_failures_total{%(selector)s}' % thanos.compact.dashboard.selector, + 'thanos_compact_garbage_collection_total{%(selector)s}' % thanos.compact.dashboard.selector, + thanos.rule.dashboard.dimensions ) ) .addPanel( g.panel('Duration', 'Shows how long has it taken to execute garbage collection in quantiles.') + - g.latencyPanel('thanos_compact_garbage_collection_duration_seconds', 'namespace="$namespace",job=~"$job"') + g.latencyPanel('thanos_compact_garbage_collection_duration_seconds', thanos.rule.dashboard.selector, thanos.rule.dashboard.dimensions) ) ) .addRow( @@ -88,7 +95,7 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; 'Shows deletion rate of blocks already marked for deletion.' ) + g.queryPanel( - 'sum(rate(thanos_compact_blocks_cleaned_total{namespace="$namespace",job=~"$job"}[$interval])) by (job)', + 'sum by (%(dimensions)s) (rate(thanos_compact_blocks_cleaned_total{%(selector)s}[$interval]))' % thanos.compact.dashboard, 'Blocks cleanup {{job}}' ) + g.stack @@ -99,7 +106,7 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; 'Shows deletion failures rate of blocks already marked for deletion.' ) + g.queryPanel( - 'sum(rate(thanos_compact_block_cleanup_failures_total{namespace="$namespace",job=~"$job"}[$interval])) by (job)', + 'sum by (%(dimensions)s) (rate(thanos_compact_block_cleanup_failures_total{%(selector)s}[$interval]))' % thanos.compact.dashboard, 'Blocks cleanup failures {{job}}' ) ) @@ -109,7 +116,7 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; 'Shows rate at which blocks are marked for deletion (from GC and retention policy).' ) + g.queryPanel( - 'sum(rate(thanos_compact_blocks_marked_for_deletion_total{namespace="$namespace",job=~"$job"}[$interval])) by (job)', + 'sum by (%(dimensions)s) (rate(thanos_compact_blocks_marked_for_deletion_total{%(selector)s}[$interval]))' % thanos.compact.dashboard, 'Blocks marked {{job}}' ) ) @@ -122,7 +129,7 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; 'Shows rate of execution for all meta files from blocks in the bucket into the memory.' ) + g.queryPanel( - 'sum(rate(thanos_blocks_meta_syncs_total{namespace="$namespace",job=~"$job"}[$interval])) by (job)', + 'sum by (%(dimensions)s) (rate(thanos_blocks_meta_syncs_total{%(selector)s}[$interval]))' % thanos.compact.dashboard, 'sync {{job}}' ) + g.stack @@ -130,13 +137,14 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Errors', 'Shows ratio of errors compared to the total number of executed meta file sync.') + g.qpsErrTotalPanel( - 'thanos_blocks_meta_sync_failures_total{namespace="$namespace",job=~"$job"}', - 'thanos_blocks_meta_syncs_total{namespace="$namespace",job=~"$job"}', + 'thanos_blocks_meta_sync_failures_total{%(selector)s}' % thanos.compact.dashboard.selector, + 'thanos_blocks_meta_syncs_total{%(selector)s}' % thanos.compact.dashboard.selector, + thanos.rule.dashboard.dimensions ) ) .addPanel( g.panel('Duration', 'Shows how long has it taken to execute meta file sync, in quantiles.') + - g.latencyPanel('thanos_blocks_meta_sync_duration_seconds', 'namespace="$namespace",job=~"$job"') + g.latencyPanel('thanos_blocks_meta_sync_duration_seconds', thanos.rule.dashboard.selector, thanos.rule.dashboard.dimensions) ) ) .addRow( @@ -144,7 +152,7 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Rate', 'Shows rate of execution for operations against the bucket.') + g.queryPanel( - 'sum(rate(thanos_objstore_bucket_operations_total{namespace="$namespace",job=~"$job"}[$interval])) by (job, operation)', + 'sum by (%(dimensions)s, operation) (rate(thanos_objstore_bucket_operations_total{%(selector)s}[$interval]))' % thanos.compact.dashboard, '{{job}} {{operation}}' ) + g.stack @@ -152,20 +160,19 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Errors', 'Shows ratio of errors compared to the total number of executed operations against the bucket.') + g.qpsErrTotalPanel( - 'thanos_objstore_bucket_operation_failures_total{namespace="$namespace",job=~"$job"}', - 'thanos_objstore_bucket_operations_total{namespace="$namespace",job=~"$job"}', + 'thanos_objstore_bucket_operation_failures_total{%(selector)s}' % thanos.compact.dashboard.selector, + 'thanos_objstore_bucket_operations_total{%(selector)s}' % thanos.compact.dashboard.selector, + thanos.rule.dashboard.dimensions ) ) .addPanel( g.panel('Duration', 'Shows how long has it taken to execute operations against the bucket, in quantiles.') + - g.latencyPanel('thanos_objstore_bucket_operation_duration_seconds', 'namespace="$namespace",job=~"$job"') + g.latencyPanel('thanos_objstore_bucket_operation_duration_seconds', thanos.rule.dashboard.selector, thanos.rule.dashboard.dimensions) ) ) .addRow( - g.resourceUtilizationRow() - ) + - g.template('namespace', thanos.dashboard.namespaceQuery) + - g.template('job', 'up', 'namespace="$namespace", %(selector)s' % thanos.compact, true, '%(jobPrefix)s.*' % thanos.compact), + g.resourceUtilizationRow(thanos.rule.dashboard.selector, thanos.rule.dashboard.dimensions) + ), __overviewRows__+:: [ g.row('Compact') @@ -175,7 +182,7 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; 'Shows rate of execution for compactions against blocks that are stored in the bucket by compaction group.' ) + g.queryPanel( - 'sum(rate(thanos_compact_group_compactions_total{namespace="$namespace",%(selector)s}[$interval])) by (job)' % thanos.compact, + 'sum by (%(dimensions)s) (rate(thanos_compact_group_compactions_total{%(selector)s}[$interval]))' % thanos.dashboard.overview, 'compaction {{job}}' ) + g.stack + @@ -187,8 +194,9 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; 'Shows ratio of errors compared to the total number of executed compactions against blocks that are stored in the bucket.' ) + g.qpsErrTotalPanel( - 'thanos_compact_group_compactions_failures_total{namespace="$namespace",%(selector)s}' % thanos.compact, - 'thanos_compact_group_compactions_total{namespace="$namespace",%(selector)s}' % thanos.compact, + 'thanos_compact_group_compactions_failures_total{%(selector)s}' % thanos.dashboard.overview.selector, + 'thanos_compact_group_compactions_total{%(selector)s}' % thanos.dashboard.overview.selector, + thanos.rule.dashboard.dimensions ) + g.addDashboardLink(thanos.compact.title) ) + diff --git a/mixin/dashboards/defaults.libsonnet b/mixin/dashboards/defaults.libsonnet index 0c6d59b6f38..c2fccf24182 100644 --- a/mixin/dashboards/defaults.libsonnet +++ b/mixin/dashboards/defaults.libsonnet @@ -7,12 +7,12 @@ dashboard:: { prefix: 'Thanos / ', tags: error 'must provide dashboard tags', - namespaceQuery: error 'must provide a query for namespace variable for dashboard template', }, // Automatically add a uid to each dashboard based on the base64 encoding // of the file name and set the timezone to be 'default'. grafanaDashboards:: { + local component = std.split(filename, '.')[0], [filename]: grafanaDashboards[filename] { uid: std.md5(filename), timezone: 'UTC', @@ -43,7 +43,39 @@ ), ], }, - } + } { + templating+: { + list+: [ + template.new( + level, + '$datasource', + 'label_values(%s, %s)' % [thanos.targetGroups[level], level], + label=level, + refresh=1, + sort=2, + ) + for level in std.objectFields(thanos.targetGroups) + ], + }, + } + if std.objectHas(thanos[component], 'selector') then { + templating+: { + local name = 'job', + local selector = std.join(', ', thanos.dashboard.selector + [thanos[component].selector]), + list+: [ + template.new( + name, + '$datasource', + 'label_values(up{%s}, %s)' % [selector, name], + label=name, + refresh=1, + sort=2, + current='all', + allValues=null, + includeAll=true + ), + ], + }, + } else {} for filename in std.objectFields(grafanaDashboards) }, } diff --git a/mixin/dashboards/overview.libsonnet b/mixin/dashboards/overview.libsonnet index a0b1082a9d2..e27736db320 100644 --- a/mixin/dashboards/overview.libsonnet +++ b/mixin/dashboards/overview.libsonnet @@ -7,15 +7,13 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; }, grafanaDashboards+:: { 'overview.json': - g.dashboard(thanos.overview.title) + - g.template('namespace', thanos.dashboard.namespaceQuery), + g.dashboard(thanos.dashboard.overview.title), }, } + { local grafanaDashboards = super.grafanaDashboards, grafanaDashboards+:: { 'overview.json'+: { - __enumeratedRows__+:: std.foldl( function(acc, row) local n = std.length(row.panels); diff --git a/mixin/dashboards/query.libsonnet b/mixin/dashboards/query.libsonnet index 529bd3d7f29..7a827298115 100644 --- a/mixin/dashboards/query.libsonnet +++ b/mixin/dashboards/query.libsonnet @@ -1,73 +1,81 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; +local utils = import '../lib/utils.libsonnet'; { local thanos = self, query+:: { - jobPrefix: error 'must provide job prefix for Thanos Query dashboard', selector: error 'must provide selector for Thanos Query dashboard', title: error 'must provide title for Thanos Query dashboard', + dashboard:: { + selector: std.join(', ', thanos.dashboard.selector + ['job="$job"']), + dimensions: std.join(', ', thanos.dashboard.dimensions + ['job']), + }, }, grafanaDashboards+:: { [if thanos.query != null then 'query.json']: + local queryHandlerSelector = utils.joinLabels([thanos.query.dashboard.selector, 'handler="query"']); + local queryRangeHandlerSelector = utils.joinLabels([thanos.query.dashboard.selector, 'handler="query_range"']); + local grpcUnarySelector = utils.joinLabels([thanos.query.dashboard.selector, 'grpc_type="unary"']); + local grpcServerStreamSelector = utils.joinLabels([thanos.query.dashboard.selector, 'grpc_type="server_stream"']); g.dashboard(thanos.query.title) .addRow( g.row('Instant Query API') .addPanel( g.panel('Rate', 'Shows rate of requests against /query for the given time.') + - g.httpQpsPanel('http_requests_total', 'namespace="$namespace",job=~"$job",handler="query"') + g.httpQpsPanel('http_requests_total', queryHandlerSelector, thanos.query.dashboard.dimensions) ) .addPanel( g.panel('Errors', 'Shows ratio of errors compared to the the total number of handled requests against /query.') + - g.httpErrPanel('http_requests_total', 'namespace="$namespace",job=~"$job",handler="query"') + g.httpErrPanel('http_requests_total', queryHandlerSelector, thanos.query.dashboard.dimensions) ) .addPanel( g.panel('Duration', 'Shows how long has it taken to handle requests in quantiles.') + - g.latencyPanel('http_request_duration_seconds', 'namespace="$namespace",job=~"$job",handler="query"') + g.latencyPanel('http_request_duration_seconds', queryHandlerSelector, thanos.query.dashboard.dimensions) ) ) .addRow( g.row('Range Query API') .addPanel( g.panel('Rate', 'Shows rate of requests against /query_range for the given time range.') + - g.httpQpsPanel('http_requests_total', 'namespace="$namespace",job=~"$job",handler="query_range"') + g.httpQpsPanel('http_requests_total', queryRangeHandlerSelector, thanos.query.dashboard.dimensions) ) .addPanel( g.panel('Errors', 'Shows ratio of errors compared to the the total number of handled requests against /query_range.') + - g.httpErrPanel('http_requests_total', 'namespace="$namespace",job=~"$job",handler="query_range"') + g.httpErrPanel('http_requests_total', queryRangeHandlerSelector, thanos.query.dashboard.dimensions) ) .addPanel( g.panel('Duration', 'Shows how long has it taken to handle requests in quantiles.') + - g.latencyPanel('http_request_duration_seconds', 'namespace="$namespace",job=~"$job",handler="query_range"') + g.latencyPanel('http_request_duration_seconds', queryRangeHandlerSelector, thanos.query.dashboard.dimensions) ) ) .addRow( g.row('gRPC (Unary)') .addPanel( g.panel('Rate', 'Shows rate of handled Unary gRPC requests from other queriers.') + - g.grpcQpsPanel('client', 'namespace="$namespace",job=~"$job",grpc_type="unary"') + g.grpcRequestsPanel('grpc_client_handled_total', grpcUnarySelector, thanos.query.dashboard.dimensions) ) .addPanel( g.panel('Errors', 'Shows ratio of errors compared to the the total number of handled requests from other queriers.') + - g.grpcErrorsPanel('client', 'namespace="$namespace",job=~"$job",grpc_type="unary"') + g.grpcErrorsPanel('grpc_client_handled_total', grpcUnarySelector, thanos.query.dashboard.dimensions) ) .addPanel( g.panel('Duration', 'Shows how long has it taken to handle requests from other queriers, in quantiles.') + - g.grpcLatencyPanel('client', 'namespace="$namespace",job=~"$job",grpc_type="unary"') + g.latencyPanel('grpc_client_handling_seconds', grpcUnarySelector, thanos.query.dashboard.dimensions) ) ) .addRow( g.row('gRPC (Stream)') .addPanel( g.panel('Rate', 'Shows rate of handled Streamed gRPC requests from other queriers.') + - g.grpcQpsPanel('client', 'namespace="$namespace",job=~"$job",grpc_type="server_stream"') + g.grpcRequestsPanel('grpc_client_handled_total', grpcServerStreamSelector, thanos.query.dashboard.dimensions) ) .addPanel( g.panel('Errors', 'Shows ratio of errors compared to the the total number of handled requests from other queriers.') + - g.grpcErrorsPanel('client', 'namespace="$namespace",job=~"$job",grpc_type="server_stream"') + g.grpcErrorsPanel('grpc_client_handled_total', grpcServerStreamSelector, thanos.query.dashboard.dimensions) ) .addPanel( g.panel('Duration', 'Shows how long has it taken to handle requests from other queriers, in quantiles') + - g.grpcLatencyPanel('client', 'namespace="$namespace",job=~"$job",grpc_type="server_stream"') + g.latencyPanel('grpc_client_handling_seconds', grpcServerStreamSelector, thanos.query.dashboard.dimensions) ) ) .addRow( @@ -75,41 +83,41 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Rate', 'Shows rate of DNS lookups to discover stores.') + g.queryPanel( - 'sum(rate(thanos_query_store_apis_dns_lookups_total{namespace="$namespace",job=~"$job"}[$interval])) by (job)', + 'sum by (%s) (rate(thanos_query_store_apis_dns_lookups_total{%s}[$interval]))' % [thanos.query.dashboard.dimensions, thanos.query.dashboard.selector], 'lookups {{job}}' ) ) .addPanel( g.panel('Errors', 'Shows ratio of failures compared to the the total number of executed DNS lookups.') + g.qpsErrTotalPanel( - 'thanos_query_store_apis_dns_failures_total{namespace="$namespace",job=~"$job"}', - 'thanos_query_store_apis_dns_lookups_total{namespace="$namespace",job=~"$job"}', + 'thanos_query_store_apis_dns_failures_total{%s}' % thanos.query.dashboard.selector, + 'thanos_query_store_apis_dns_lookups_total{%s}' % thanos.query.dashboard.selector, + thanos.query.dashboard.dimensions ) ) ) .addRow( - g.resourceUtilizationRow() - ) + - g.template('namespace', thanos.dashboard.namespaceQuery) + - g.template('job', 'up', 'namespace="$namespace", %(selector)s' % thanos.query, true, '%(jobPrefix)s.*' % thanos.query), + g.resourceUtilizationRow(thanos.query.dashboard.selector, thanos.query.dashboard.dimensions) + ), __overviewRows__+:: [ g.row('Instant Query') .addPanel( g.panel('Requests Rate', 'Shows rate of requests against /query for the given time.') + - g.httpQpsPanel('http_requests_total', 'namespace="$namespace",%(selector)s,handler="query"' % thanos.query) + + g.httpQpsPanel('http_requests_total', utils.joinLabels([thanos.dashboard.overview.selector, 'handler="query"']), thanos.dashboard.overview.dimensions) + g.addDashboardLink(thanos.query.title) ) .addPanel( g.panel('Requests Errors', 'Shows ratio of errors compared to the the total number of handled requests against /query.') + - g.httpErrPanel('http_requests_total', 'namespace="$namespace",%(selector)s,handler="query"' % thanos.query) + + g.httpErrPanel('http_requests_total', utils.joinLabels([thanos.dashboard.overview.selector, 'handler="query"']), thanos.dashboard.overview.dimensions) + g.addDashboardLink(thanos.query.title) ) .addPanel( g.sloLatency( 'Latency 99th Percentile', 'Shows how long has it taken to handle requests.', - 'http_request_duration_seconds_bucket{namespace="$namespace",%(selector)s,handler="query"}' % thanos.query, + 'http_request_duration_seconds_bucket{%s}' % utils.joinLabels([thanos.dashboard.overview.selector, 'handler="query"']), + thanos.dashboard.overview.dimensions, 0.99, 0.5, 1 @@ -120,19 +128,20 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; g.row('Range Query') .addPanel( g.panel('Requests Rate', 'Shows rate of requests against /query_range for the given time range.') + - g.httpQpsPanel('http_requests_total', 'namespace="$namespace",%(selector)s,handler="query_range"' % thanos.query) + + g.httpQpsPanel('http_requests_total', utils.joinLabels([thanos.dashboard.overview.selector, 'handler="query_range"']), thanos.dashboard.overview.dimensions) + g.addDashboardLink(thanos.query.title) ) .addPanel( g.panel('Requests Errors', 'Shows ratio of errors compared to the the total number of handled requests against /query_range.') + - g.httpErrPanel('http_requests_total', 'namespace="$namespace",%(selector)s,handler="query_range"' % thanos.query) + + g.httpErrPanel('http_requests_total', utils.joinLabels([thanos.dashboard.overview.selector, 'handler="query_range"']), thanos.dashboard.overview.dimensions) + g.addDashboardLink(thanos.query.title) ) .addPanel( g.sloLatency( 'Latency 99th Percentile', 'Shows how long has it taken to handle requests.', - 'http_request_duration_seconds_bucket{namespace="$namespace",%(selector)s,handler="query_range"}' % thanos.query, + 'http_request_duration_seconds_bucket{%s}' % utils.joinLabels([thanos.dashboard.overview.selector, 'handler="query_range"']), + thanos.dashboard.overview.dimensions, 0.99, 0.5, 1 diff --git a/mixin/dashboards/receive.libsonnet b/mixin/dashboards/receive.libsonnet index 4ff3317d95d..bb23250fcbc 100644 --- a/mixin/dashboards/receive.libsonnet +++ b/mixin/dashboards/receive.libsonnet @@ -1,28 +1,36 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; +local utils = import '../lib/utils.libsonnet'; { local thanos = self, receive+:: { - jobPrefix: error 'must provide job prefix for Thanos Receive dashboard', selector: error 'must provide selector for Thanos Receive dashboard', title: error 'must provide title for Thanos Receive dashboard', + dashboard:: { + selector: std.join(', ', thanos.dashboard.selector + ['job="$job"']), + dimensions: std.join(', ', thanos.dashboard.dimensions + ['job']), + }, }, grafanaDashboards+:: { [if thanos.receive != null then 'receive.json']: + local receiveHandlerSelector = utils.joinLabels([thanos.receive.dashboard.selector, 'handler="receive"']); + local grpcUnaryWriteSelector = utils.joinLabels([thanos.receive.dashboard.selector, 'grpc_type="unary"', 'grpc_method="RemoteWrite"']); + local grpcUnaryReadSelector = utils.joinLabels([thanos.receive.dashboard.selector, 'grpc_type="unary"', 'grpc_method!="RemoteWrite"']); + local grpcServerStreamSelector = utils.joinLabels([thanos.receive.dashboard.selector, 'grpc_type="server_stream"']); g.dashboard(thanos.receive.title) .addRow( g.row('WRITE - Incoming Request') .addPanel( g.panel('Rate', 'Shows rate of incoming requests.') + - g.httpQpsPanel('http_requests_total', 'handler="receive",namespace="$namespace",job=~"$job"') + g.httpQpsPanel('http_requests_total', receiveHandlerSelector, thanos.receive.dashboard.dimensions) ) .addPanel( g.panel('Errors', 'Shows ratio of errors compared to the total number of handled incoming requests.') + - g.httpErrPanel('http_requests_total', 'handler="receive",namespace="$namespace",job=~"$job"') + g.httpErrPanel('http_requests_total', receiveHandlerSelector, thanos.receive.dashboard.dimensions) ) .addPanel( g.panel('Duration', 'Shows how long has it taken to handle incoming requests in quantiles.') + - g.latencyPanel('http_request_duration_seconds', 'handler="receive",namespace="$namespace",job=~"$job"') + g.latencyPanel('http_request_duration_seconds', receiveHandlerSelector, thanos.receive.dashboard.dimensions) ) ) .addRow( @@ -30,15 +38,16 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Rate', 'Shows rate of replications to other receive nodes.') + g.queryPanel( - 'sum(rate(thanos_receive_replications_total{namespace="$namespace",job=~"$job"}[$interval])) by (job)', + 'sum by (%s) (rate(thanos_receive_replications_total{%s}[$interval]))' % [thanos.receive.dashboard.dimensions, thanos.receive.dashboard.selector], 'all {{job}}', ) ) .addPanel( g.panel('Errors', 'Shows ratio of errors compared to the total number of replications to other receive nodes.') + g.qpsErrTotalPanel( - 'thanos_receive_replications_total{namespace="$namespace",job=~"$job",result="error"}', - 'thanos_receive_replications_total{namespace="$namespace",job=~"$job"}', + 'thanos_receive_replications_total{%s}' % utils.joinLabels([thanos.receive.dashboard.selector, 'result="error"']), + 'thanos_receive_replications_total{%s}' % thanos.receive.dashboard.selector, + thanos.receive.dashboard.dimensions ) ) ) @@ -47,61 +56,65 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Rate', 'Shows rate of forwarded requests to other receive nodes.') + g.queryPanel( - 'sum(rate(thanos_receive_forward_requests_total{namespace="$namespace",job=~"$job"}[$interval])) by (job)', + 'sum by (%s) (rate(thanos_receive_forward_requests_total{%s}[$interval]))' % [thanos.receive.dashboard.dimensions, thanos.receive.dashboard.selector], 'all {{job}}', ) ) .addPanel( g.panel('Errors', 'Shows ratio of errors compared to the total number of forwareded requests to other receive nodes.') + g.qpsErrTotalPanel( - 'thanos_receive_forward_requests_total{namespace="$namespace",job=~"$job",result="error"}', - 'thanos_receive_forward_requests_total{namespace="$namespace",job=~"$job"}', + 'thanos_receive_forward_requests_total{%s}' % utils.joinLabels([thanos.receive.dashboard.selector, 'result="error"']), + 'thanos_receive_forward_requests_total{%s}' % thanos.receive.dashboard.selector, + thanos.receive.dashboard.dimensions ) ) ) .addRow( + // TODO(https://github.com/thanos-io/thanos/issues/3926) g.row('WRITE - gRPC (Unary)') .addPanel( g.panel('Rate', 'Shows rate of handled Unary gRPC requests from queriers.') + - g.grpcQpsPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="unary",grpc_method="RemoteWrite"') + g.grpcRequestsPanel('grpc_server_handled_total', grpcUnaryWriteSelector, thanos.receive.dashboard.dimensions) ) .addPanel( g.panel('Errors', 'Shows ratio of errors compared to the total number of handled requests from queriers.') + - g.grpcErrorsPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="unary",grpc_method="RemoteWrite"') + g.grpcErrorsPanel('grpc_server_handled_total', grpcUnaryWriteSelector, thanos.receive.dashboard.dimensions) ) .addPanel( g.panel('Duration', 'Shows how long has it taken to handle requests from queriers, in quantiles.') + - g.grpcLatencyPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="unary",grpc_method="RemoteWrite"') + g.latencyPanel('grpc_server_handling_seconds', grpcUnaryWriteSelector, thanos.receive.dashboard.dimensions) ) ) .addRow( + // TODO(https://github.com/thanos-io/thanos/issues/3926) g.row('READ - gRPC (Unary)') .addPanel( g.panel('Rate', 'Shows rate of handled Unary gRPC requests from queriers.') + - g.grpcQpsPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="unary",grpc_method!="RemoteWrite"') + g.grpcRequestsPanel('grpc_server_handled_total', grpcUnaryReadSelector, thanos.receive.dashboard.dimensions) ) .addPanel( g.panel('Errors', 'Shows ratio of errors compared to the total number of handled requests from queriers.') + - g.grpcErrorsPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="unary",grpc_method!="RemoteWrite"') + g.grpcErrorsPanel('grpc_server_handled_total', grpcUnaryReadSelector, thanos.receive.dashboard.dimensions) ) .addPanel( g.panel('Duration', 'Shows how long has it taken to handle requests from queriers, in quantiles.') + - g.grpcLatencyPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="unary",grpc_method!="RemoteWrite"') + g.latencyPanel('grpc_server_handling_seconds', grpcUnaryReadSelector, thanos.receive.dashboard.dimensions) ) ) .addRow( + // TODO(https://github.com/thanos-io/thanos/issues/3926) g.row('READ - gRPC (Stream)') .addPanel( g.panel('Rate', 'Shows rate of handled Streamed gRPC requests from queriers.') + - g.grpcQpsPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="server_stream"') + g.grpcRequestsPanel('grpc_server_handled_total', grpcServerStreamSelector, thanos.receive.dashboard.dimensions) ) .addPanel( g.panel('Errors', 'Shows ratio of errors compared to the total number of handled requests from queriers.') + - g.grpcErrorsPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="server_stream"') + g.grpcErrorsPanel('grpc_server_handled_total', grpcServerStreamSelector, thanos.receive.dashboard.dimensions) ) .addPanel( g.panel('Duration', 'Shows how long has it taken to handle requests from queriers, in quantiles.') + - g.grpcLatencyPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="server_stream"') + g.latencyPanel('grpc_server_handling_seconds', grpcServerStreamSelector, thanos.receive.dashboard.dimensions) ) ) .addRow( @@ -109,7 +122,7 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Successful Upload', 'Shows the relative time of last successful upload to the object-store bucket.') + g.tablePanel( - ['time() - max(thanos_objstore_bucket_last_successful_upload_time{namespace="$namespace",job=~"$job"}) by (job, bucket)'], + ['time() - max by (%s) (thanos_objstore_bucket_last_successful_upload_time{%s})' % [utils.joinLabels([thanos.receive.dashboard.dimensions, 'bucket']), thanos.receive.dashboard.selector]], { Value: { alias: 'Uploaded Ago', @@ -121,28 +134,27 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; ) ) .addRow( - g.resourceUtilizationRow() - ) + - g.template('namespace', thanos.dashboard.namespaceQuery) + - g.template('job', 'up', 'namespace="$namespace", %(selector)s' % thanos.receive, true, '%(jobPrefix)s.*' % thanos.receive), + g.resourceUtilizationRow(thanos.receive.dashboard.selector, thanos.receive.dashboard.dimensions) + ), __overviewRows__+:: [ g.row('Receive') .addPanel( g.panel('Incoming Requests Rate', 'Shows rate of incoming requests.') + - g.httpQpsPanel('http_requests_total', 'handler="receive",namespace="$namespace",%(selector)s' % thanos.receive) + + g.httpQpsPanel('http_requests_total', utils.joinLabels([thanos.dashboard.overview.selector, 'handler="receive"']), thanos.dashboard.overview.dimensions) + g.addDashboardLink(thanos.receive.title) ) .addPanel( g.panel('Incoming Requests Errors', 'Shows ratio of errors compared to the total number of handled incoming requests.') + - g.httpErrPanel('http_requests_total', 'handler="receive",namespace="$namespace",%(selector)s' % thanos.receive) + + g.httpErrPanel('http_requests_total', utils.joinLabels([thanos.dashboard.overview.selector, 'handler="receive"']), thanos.dashboard.overview.dimensions) + g.addDashboardLink(thanos.receive.title) ) .addPanel( g.sloLatency( 'Incoming Requests Latency 99th Percentile', 'Shows how long has it taken to handle incoming requests.', - 'http_request_duration_seconds_bucket{handler="receive",namespace="$namespace",%(selector)s}' % thanos.receive, + 'http_request_duration_seconds_bucket{%s}' % utils.joinLabels([thanos.dashboard.overview.selector, 'handler="receive"']), + thanos.dashboard.overview.dimensions, 0.99, 0.5, 1 diff --git a/mixin/dashboards/rule.libsonnet b/mixin/dashboards/rule.libsonnet index 9b5480cf327..a1dd966370f 100644 --- a/mixin/dashboards/rule.libsonnet +++ b/mixin/dashboards/rule.libsonnet @@ -1,32 +1,35 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; +local utils = import '../lib/utils.libsonnet'; { local thanos = self, rule+:: { - jobPrefix: error 'must provide job prefix for Thanos Rule dashboard', selector: error 'must provide selector for Thanos Rule dashboard', title: error 'must provide title for Thanos Rule dashboard', + dashboard:: { + selector: std.join(', ', thanos.dashboard.selector + ['job="$job"']), + dimensions: std.join(', ', thanos.dashboard.dimensions + ['job']), + }, }, grafanaDashboards+:: { [if thanos.rule != null then 'rule.json']: + local grpcUnarySelector = utils.joinLabels([thanos.rule.dashboard.selector, 'grpc_type="unary"']); + local grpcServerStreamSelector = utils.joinLabels([thanos.rule.dashboard.selector, 'grpc_type="server_stream"']); + g.dashboard(thanos.rule.title) .addRow( g.row('Rule Group Evaluations') .addPanel( g.panel('Rule Group Evaluations') + g.queryPanel( - ||| - sum by (strategy) (rate(prometheus_rule_evaluations_total{namespace="$namespace",job="$job"}[$interval])) - |||, + 'sum by (%s) (rate(prometheus_rule_evaluations_total{%s}[$interval]))' % [utils.joinLabels([thanos.rule.dashboard.dimensions, 'strategy']), thanos.rule.dashboard.selector], '{{ strategy }}', ) ) .addPanel( g.panel('Rule Group Evaluations Missed') + g.queryPanel( - ||| - sum by (strategy) (increase(prometheus_rule_group_iterations_missed_total{namespace="$namespace",job="$job"}[$interval])) - |||, + 'sum by (%s) (increase(prometheus_rule_group_iterations_missed_total{%s}[$interval]))' % [utils.joinLabels([thanos.rule.dashboard.dimensions, 'strategy']), thanos.rule.dashboard.selector], '{{ strategy }}', ) ) @@ -35,11 +38,11 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; g.queryPanel( ||| ( - max by(rule_group) (prometheus_rule_group_last_duration_seconds{namespace="$namespace",job="$job"}) + max by(%(dimensions)s, rule_group) (prometheus_rule_group_last_duration_seconds{%(selector)s}) > - sum by(rule_group) (prometheus_rule_group_interval_seconds{namespace="$namespace",job="$job"}) + sum by(%(dimensions)s, rule_group) (prometheus_rule_group_interval_seconds{%(selector)s}) ) - |||, + ||| % thanos.rule.dashboard, '{{ rule_group }}', ) ) @@ -49,14 +52,14 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Dropped Rate', 'Shows rate of dropped alerts.') + g.queryPanel( - 'sum(rate(thanos_alert_sender_alerts_dropped_total{namespace="$namespace",job=~"$job"}[$interval])) by (job, alertmanager)', + 'sum by (%(dimensions)s, alertmanager) (rate(thanos_alert_sender_alerts_dropped_total{%s}[$interval]))' % [thanos.rule.dashboard.dimensions, thanos.rule.dashboard.selector], '{{alertmanager}}' ) ) .addPanel( g.panel('Sent Rate', 'Shows rate of alerts that successfully sent to alert manager.') + g.queryPanel( - 'sum(rate(thanos_alert_sender_alerts_sent_total{namespace="$namespace",job=~"$job"}[$interval])) by (job, alertmanager)', + 'sum by (%(dimensions)s, alertmanager) (rate(thanos_alert_sender_alerts_sent_total{%s}[$interval]))' % [thanos.rule.dashboard.dimensions, thanos.rule.dashboard.selector], '{{alertmanager}}' ) + g.stack @@ -64,13 +67,14 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Sent Errors', 'Shows ratio of errors compared to the total number of sent alerts.') + g.qpsErrTotalPanel( - 'thanos_alert_sender_errors_total{namespace="$namespace",job=~"$job"}', - 'thanos_alert_sender_alerts_sent_total{namespace="$namespace",job=~"$job"}', + 'thanos_alert_sender_errors_total{%s}' % thanos.rule.dashboard.selector, + 'thanos_alert_sender_alerts_sent_total{%s}' % thanos.rule.dashboard.selector, + thanos.rule.dashboard.dimensions ) ) .addPanel( g.panel('Sent Duration', 'Shows how long has it taken to send alerts to alert manager.') + - g.latencyPanel('thanos_alert_sender_latency_seconds', 'namespace="$namespace",job=~"$job"'), + g.latencyPanel('thanos_alert_sender_latency_seconds', thanos.rule.dashboard.selector, thanos.rule.dashboard.dimensions), ) ) .addRow( @@ -78,15 +82,16 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Push Rate', 'Shows rate of queued alerts.') + g.queryPanel( - 'sum(rate(thanos_alert_queue_alerts_dropped_total{namespace="$namespace",job=~"$job"}[$interval])) by (job)', + 'sum by (%s) (rate(thanos_alert_queue_alerts_dropped_total{%s}[$interval]))' % [thanos.rule.dashboard.dimensions, thanos.rule.dashboard.selector], '{{job}}' ) ) .addPanel( g.panel('Drop Ratio', 'Shows ratio of dropped alerts compared to the total number of queued alerts.') + g.qpsErrTotalPanel( - 'thanos_alert_queue_alerts_dropped_total{namespace="$namespace",job=~"$job"}', - 'thanos_alert_queue_alerts_pushed_total{namespace="$namespace",job=~"$job"}', + 'thanos_alert_queue_alerts_dropped_total{%s}' % thanos.rule.dashboard.selector, + 'thanos_alert_queue_alerts_pushed_total{%s}' % thanos.rule.dashboard.selector, + thanos.rule.dashboard.dimensions ) ) ) @@ -94,44 +99,42 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; g.row('gRPC (Unary)') .addPanel( g.panel('Rate', 'Shows rate of handled Unary gRPC requests.') + - g.grpcQpsPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="unary"') + g.grpcRequestsPanel('grpc_server_handled_total', grpcUnarySelector, thanos.rule.dashboard.dimensions) ) .addPanel( g.panel('Errors', 'Shows ratio of errors compared to the total number of handled requests.') + - g.grpcErrorsPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="unary"') + g.grpcErrorsPanel('grpc_server_handled_total', grpcUnarySelector, thanos.rule.dashboard.dimensions) ) .addPanel( g.panel('Duration', 'Shows how long has it taken to handle requests, in quantiles.') + - g.grpcLatencyPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="unary"') + g.latencyPanel('grpc_server_handling_seconds', grpcUnarySelector, thanos.rule.dashboard.dimensions) ) ) .addRow( g.row('gRPC (Stream)') .addPanel( g.panel('Rate', 'Shows rate of handled Streamed gRPC requests.') + - g.grpcQpsPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="server_stream"') + g.grpcRequestsPanel('grpc_server_handled_total', grpcServerStreamSelector, thanos.rule.dashboard.dimensions) ) .addPanel( g.panel('Errors', 'Shows ratio of errors compared to the total number of handled requests.') + - g.grpcErrorsPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="server_stream"') + g.grpcErrorsPanel('grpc_server_handled_total', grpcServerStreamSelector, thanos.rule.dashboard.dimensions) ) .addPanel( g.panel('Duration', 'Shows how long has it taken to handle requests, in quantiles') + - g.grpcLatencyPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="server_stream"') + g.latencyPanel('grpc_server_handling_seconds', grpcServerStreamSelector, thanos.rule.dashboard.dimensions) ) ) .addRow( - g.resourceUtilizationRow() - ) + - g.template('namespace', thanos.dashboard.namespaceQuery) + - g.template('job', 'up', 'namespace="$namespace", %(selector)s' % thanos.rule, true, '%(jobPrefix)s.*' % thanos.rule), + g.resourceUtilizationRow(thanos.rule.dashboard.selector, thanos.rule.dashboard.dimensions) + ), __overviewRows__+:: [ g.row('Rule') .addPanel( g.panel('Alert Sent Rate', 'Shows rate of alerts that successfully sent to alert manager.') + g.queryPanel( - 'sum(rate(thanos_alert_sender_alerts_sent_total{namespace="$namespace",%(selector)s}[$interval])) by (job, alertmanager)' % thanos.rule, + 'sum by (%s) (rate(thanos_alert_sender_alerts_sent_total{%s}[$interval]))' % [utils.joinLabels([thanos.dashboard.overview.dimensions, 'alertmanager']), thanos.dashboard.overview.selector], '{{alertmanager}}' ) + g.addDashboardLink(thanos.rule.title) + @@ -140,8 +143,9 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Alert Sent Errors', 'Shows ratio of errors compared to the total number of sent alerts.') + g.qpsErrTotalPanel( - 'thanos_alert_sender_errors_total{namespace="$namespace",%(selector)s}' % thanos.rule, - 'thanos_alert_sender_alerts_sent_total{namespace="$namespace",%(selector)s}' % thanos.rule, + 'thanos_alert_sender_errors_total{%s}' % thanos.dashboard.overview.selector, + 'thanos_alert_sender_alerts_sent_total{%s}' % thanos.dashboard.overview.selector, + thanos.dashboard.overview.dimensions ) + g.addDashboardLink(thanos.rule.title) ) @@ -149,7 +153,8 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; g.sloLatency( 'Alert Sent Duration', 'Shows how long has it taken to send alerts to alert manager.', - 'thanos_alert_sender_latency_seconds_bucket{namespace="$namespace",%(selector)s}' % thanos.rule, + 'thanos_alert_sender_latency_seconds_bucket{%s}' % thanos.dashboard.overview.selector, + thanos.dashboard.overview.dimensions, 0.99, 0.5, 1 diff --git a/mixin/dashboards/sidecar.libsonnet b/mixin/dashboards/sidecar.libsonnet index 84c07ebfa0b..de726945b37 100644 --- a/mixin/dashboards/sidecar.libsonnet +++ b/mixin/dashboards/sidecar.libsonnet @@ -1,43 +1,50 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; +local utils = import '../lib/utils.libsonnet'; { local thanos = self, sidecar+:: { - jobPrefix: error 'must provide job prefix for Thanos Sidecar dashboard', selector: error 'must provide selector for Thanos Sidecar dashboard', title: error 'must provide title for Thanos Sidecar dashboard', + dashboard:: { + selector: std.join(', ', thanos.dashboard.selector + ['job="$job"']), + dimensions: std.join(', ', thanos.dashboard.dimensions + ['job']), + }, }, grafanaDashboards+:: { [if thanos.sidecar != null then 'sidecar.json']: + local grpcUnarySelector = utils.joinLabels([thanos.sidecar.dashboard.selector, 'grpc_type="unary"']); + local grpcServerSelector = utils.joinLabels([thanos.sidecar.dashboard.selector, 'grpc_type="server_stream"']); + g.dashboard(thanos.sidecar.title) .addRow( g.row('gRPC (Unary)') .addPanel( g.panel('Rate', 'Shows rate of handled Unary gRPC requests from queriers.') + - g.grpcQpsPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="unary"') + g.grpcRequestsPanel('grpc_server_handled_total', grpcUnarySelector, thanos.sidecar.dashboard.dimensions) ) .addPanel( g.panel('Errors', 'Shows ratio of errors compared to the total number of handled requests from queriers.') + - g.grpcErrorsPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="unary"') + g.grpcErrorsPanel('grpc_server_handled_total', grpcUnarySelector, thanos.sidecar.dashboard.dimensions) ) .addPanel( g.panel('Duration', 'Shows how long has it taken to handle requests from queriers, in quantiles.') + - g.grpcLatencyPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="unary"') + g.latencyPanel('grpc_server_handling_seconds', grpcUnarySelector, thanos.sidecar.dashboard.dimensions) ) ) .addRow( g.row('gRPC (Stream)') .addPanel( g.panel('Rate', 'Shows rate of handled Streamed gRPC requests from queriers.') + - g.grpcQpsPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="server_stream"') + g.grpcRequestsPanel('grpc_server_handled_total', grpcServerSelector, thanos.sidecar.dashboard.dimensions) ) .addPanel( g.panel('Errors') + - g.grpcErrorsPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="server_stream"') + g.grpcErrorsPanel('grpc_server_handled_total', grpcServerSelector, thanos.sidecar.dashboard.dimensions) ) .addPanel( g.panel('Duration', 'Shows how long has it taken to handle requests from queriers, in quantiles.') + - g.grpcLatencyPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="server_stream"') + g.latencyPanel('grpc_server_handling_seconds', grpcServerSelector, thanos.sidecar.dashboard.dimensions) ) ) .addRow( @@ -45,7 +52,7 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Successful Upload', 'Shows the relative time of last successful upload to the object-store bucket.') + g.tablePanel( - ['time() - max(thanos_objstore_bucket_last_successful_upload_time{namespace="$namespace",job=~"$job"}) by (job, bucket)'], + ['time() - max by (%s) (thanos_objstore_bucket_last_successful_upload_time{%s})' % [utils.joinLabels([thanos.sidecar.dashboard.dimensions, 'bucket']), thanos.sidecar.dashboard.selector]], { Value: { alias: 'Uploaded Ago', @@ -61,7 +68,7 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Rate') + g.queryPanel( - 'sum(rate(thanos_objstore_bucket_operations_total{namespace="$namespace",job=~"$job"}[$interval])) by (job, operation)', + 'sum by (%s) (rate(thanos_objstore_bucket_operations_total{%s}[$interval]))' % [utils.joinLabels([thanos.sidecar.dashboard.dimensions, 'operation']), thanos.sidecar.dashboard.selector], '{{job}} {{operation}}' ) + g.stack @@ -69,38 +76,38 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Errors') + g.qpsErrTotalPanel( - 'thanos_objstore_bucket_operation_failures_total{namespace="$namespace",job=~"$job"}', - 'thanos_objstore_bucket_operations_total{namespace="$namespace",job=~"$job"}', + 'thanos_objstore_bucket_operation_failures_total{%s}' % thanos.sidecar.dashboard.selector, + 'thanos_objstore_bucket_operations_total{%s}' % thanos.sidecar.dashboard.selector, + thanos.sidecar.dashboard.dimensions ) ) .addPanel( g.panel('Duration') + - g.latencyPanel('thanos_objstore_bucket_operation_duration_seconds', 'namespace="$namespace",job=~"$job"') + g.latencyPanel('thanos_objstore_bucket_operation_duration_seconds', thanos.sidecar.dashboard.selector, thanos.sidecar.dashboard.dimensions) ) ) .addRow( - g.resourceUtilizationRow() - ) + - g.template('namespace', thanos.dashboard.namespaceQuery) + - g.template('job', 'up', 'namespace="$namespace", %(selector)s' % thanos.sidecar, true, '%(jobPrefix)s.*' % thanos.sidecar), + g.resourceUtilizationRow(thanos.sidecar.dashboard.selector, thanos.sidecar.dashboard.dimensions) + ), __overviewRows__+:: [ g.row('Sidecar') .addPanel( g.panel('gPRC (Unary) Rate', 'Shows rate of handled Unary gRPC requests from queriers.') + - g.grpcQpsPanel('server', 'namespace="$namespace",%(selector)s,grpc_type="unary"' % thanos.sidecar) + + g.grpcRequestsPanel('grpc_server_handled_total', utils.joinLabels([thanos.dashboard.overview.selector, 'grpc_type="unary"']), thanos.dashboard.overview.dimensions) + g.addDashboardLink(thanos.sidecar.title) ) .addPanel( g.panel('gPRC (Unary) Errors', 'Shows ratio of errors compared to the total number of handled requests from queriers.') + - g.grpcErrorsPanel('server', 'namespace="$namespace",%(selector)s,grpc_type="unary"' % thanos.sidecar) + + g.grpcErrorsPanel('grpc_server_handled_total', utils.joinLabels([thanos.dashboard.overview.selector, 'grpc_type="unary"']), thanos.dashboard.overview.dimensions) + g.addDashboardLink(thanos.sidecar.title) ) .addPanel( g.sloLatency( 'gPRC (Unary) Latency 99th Percentile', 'Shows how long has it taken to handle requests from queriers, in quantiles.', - 'grpc_server_handling_seconds_bucket{grpc_type="unary",namespace="$namespace",%(selector)s}' % thanos.sidecar, + 'grpc_server_handling_seconds_bucket{%s}' % utils.joinLabels([thanos.dashboard.overview.selector, 'grpc_type="unary"']), + thanos.dashboard.overview.dimensions, 0.99, 0.5, 1 diff --git a/mixin/dashboards/store.libsonnet b/mixin/dashboards/store.libsonnet index c94ae0f2db2..93b2a860b89 100644 --- a/mixin/dashboards/store.libsonnet +++ b/mixin/dashboards/store.libsonnet @@ -1,43 +1,50 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; +local utils = import '../lib/utils.libsonnet'; { local thanos = self, store+:: { - jobPrefix: error 'must provide job prefix for Thanos Store dashboard', selector: error 'must provide selector for Thanos Store dashboard', title: error 'must provide title for Thanos Store dashboard', + dashboard:: { + selector: std.join(', ', thanos.dashboard.selector + ['job="$job"']), + dimensions: std.join(', ', thanos.dashboard.dimensions + ['job']), + }, }, grafanaDashboards+:: { [if thanos.store != null then 'store.json']: + local grpcUnarySelector = utils.joinLabels([thanos.store.dashboard.selector, 'grpc_type="unary"']); + local grpcServerStreamSelector = utils.joinLabels([thanos.store.dashboard.selector, 'grpc_type="server_stream"']); + g.dashboard(thanos.store.title) .addRow( g.row('gRPC (Unary)') .addPanel( g.panel('Rate', 'Shows rate of handled Unary gRPC requests from queriers.') + - g.grpcQpsPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="unary"') + g.grpcRequestsPanel('grpc_server_handled_total', grpcUnarySelector, thanos.store.dashboard.dimensions) ) .addPanel( g.panel('Errors', 'Shows ratio of errors compared to the total number of handled requests from queriers.') + - g.grpcErrorsPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="unary"') + g.grpcErrorsPanel('grpc_server_handled_total', grpcUnarySelector, thanos.store.dashboard.dimensions) ) .addPanel( g.panel('Duration', 'Shows how long has it taken to handle requests from queriers, in quantiles.') + - g.grpcLatencyPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="unary"') + g.latencyPanel('grpc_server_handling_seconds', grpcUnarySelector, thanos.store.dashboard.dimensions) ) ) .addRow( g.row('gRPC (Stream)') .addPanel( g.panel('Rate', 'Shows rate of handled Streamed gRPC requests from queriers.') + - g.grpcQpsPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="server_stream"') + g.grpcRequestsPanel('grpc_server_handled_total', grpcServerStreamSelector, thanos.store.dashboard.dimensions) ) .addPanel( g.panel('Errors', 'Shows ratio of errors compared to the total number of handled requests from queriers.') + - g.grpcErrorsPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="server_stream"') + g.grpcErrorsPanel('grpc_server_handled_total', grpcServerStreamSelector, thanos.store.dashboard.dimensions) ) .addPanel( g.panel('Duration', 'Shows how long has it taken to handle requests from queriers, in quantiles.') + - g.grpcLatencyPanel('server', 'namespace="$namespace",job=~"$job",grpc_type="server_stream"') + g.latencyPanel('grpc_server_handling_seconds', grpcServerStreamSelector, thanos.store.dashboard.dimensions) ) ) .addRow( @@ -45,7 +52,7 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Rate', 'Shows rate of execution for operations against the bucket.') + g.queryPanel( - 'sum(rate(thanos_objstore_bucket_operations_total{namespace="$namespace",job=~"$job"}[$interval])) by (job, operation)', + 'sum by (%s) (rate(thanos_objstore_bucket_operations_total{%s}[$interval]))' % [utils.joinLabels([thanos.store.dashboard.dimensions, 'operation']), thanos.store.dashboard.selector], '{{job}} {{operation}}' ) + g.stack @@ -53,7 +60,7 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Errors', 'Shows ratio of errors compared to the total number of executed operations against the bucket.') + g.queryPanel( - 'sum by (job, operation) (rate(thanos_objstore_bucket_operation_failures_total{namespace="$namespace",job=~"$job"}[$interval])) / sum by (job, operation) (rate(thanos_objstore_bucket_operations_total{namespace="$namespace",job=~"$job"}[$interval]))', + 'sum by (%(dimensions)s) (rate(thanos_objstore_bucket_operation_failures_total{%(selector)s}[$interval])) / sum by (%(dimensions)s) (rate(thanos_objstore_bucket_operations_total{%(selector)s}[$interval]))' % thanos.store.dashboard { dimensions: utils.joinLabels([thanos.store.dashboard.dimensions, 'operation']) }, '{{job}} {{operation}}' ) + { yaxes: g.yaxes({ format: 'percentunit' }) } + @@ -61,7 +68,7 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; ) .addPanel( g.panel('Duration', 'Shows how long has it taken to execute operations against the bucket, in quantiles.') + - $.latencyByOperationPanel('thanos_objstore_bucket_operation_duration_seconds', 'namespace="$namespace",job=~"$job"') + $.latencyByOperationPanel('thanos_objstore_bucket_operation_duration_seconds', thanos.store.dashboard.selector, thanos.store.dashboard.dimensions) ) ) .addRow( @@ -69,7 +76,7 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Block Load Rate', 'Shows rate of block loads from the bucket.') + g.queryPanel( - 'sum(rate(thanos_bucket_store_block_loads_total{namespace="$namespace",job=~"$job"}[$interval]))', + 'sum by (%s) (rate(thanos_bucket_store_block_loads_total{%s}[$interval]))' % [thanos.store.dashboard.dimensions, thanos.store.dashboard.selector], 'block loads' ) + g.stack @@ -77,14 +84,15 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Block Load Errors', 'Shows ratio of errors compared to the total number of block loads from the bucket.') + g.qpsErrTotalPanel( - 'thanos_bucket_store_block_load_failures_total{namespace="$namespace",job=~"$job"}', - 'thanos_bucket_store_block_loads_total{namespace="$namespace",job=~"$job"}', + 'thanos_bucket_store_block_load_failures_total{%s}' % thanos.store.dashboard.selector, + 'thanos_bucket_store_block_loads_total{%s}' % thanos.store.dashboard.selector, + thanos.store.dashboard.dimensions ) ) .addPanel( g.panel('Block Drop Rate', 'Shows rate of block drops.') + g.queryPanel( - 'sum(rate(thanos_bucket_store_block_drops_total{namespace="$namespace",job=~"$job"}[$interval])) by (job, operation)', + 'sum by (%s) (rate(thanos_bucket_store_block_drops_total{%s}[$interval]))' % [utils.joinLabels([thanos.store.dashboard.dimensions, 'operation']), thanos.store.dashboard.selector], 'block drops {{job}}' ) + g.stack @@ -92,8 +100,9 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Block Drop Errors', 'Shows ratio of errors compared to the total number of block drops.') + g.qpsErrTotalPanel( - 'thanos_bucket_store_block_drop_failures_total{namespace="$namespace",job=~"$job"}', - 'thanos_bucket_store_block_drops_total{namespace="$namespace",job=~"$job"}', + 'thanos_bucket_store_block_drop_failures_total{%s}' % thanos.store.dashboard.selector, + 'thanos_bucket_store_block_drops_total{%s}' % thanos.store.dashboard.selector, + thanos.store.dashboard.dimensions ) ) ) @@ -102,7 +111,7 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Requests', 'Show rate of cache requests.') + g.queryPanel( - 'sum(rate(thanos_store_index_cache_requests_total{namespace="$namespace",job=~"$job"}[$interval])) by (job, item_type)', + 'sum by (%s) (rate(thanos_store_index_cache_requests_total{%s}[$interval]))' % [utils.joinLabels([thanos.store.dashboard.dimensions, 'item_type']), thanos.store.dashboard.selector], '{{job}} {{item_type}}', ) + g.stack @@ -110,7 +119,7 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Hits', 'Shows ratio of errors compared to the total number of cache hits.') + g.queryPanel( - 'sum(rate(thanos_store_index_cache_hits_total{namespace="$namespace",job=~"$job"}[$interval])) by (job, item_type)', + 'sum by (%s) (rate(thanos_store_index_cache_hits_total{%s}[$interval]))' % [utils.joinLabels([thanos.store.dashboard.dimensions, 'item_type']), thanos.store.dashboard.selector], '{{job}} {{item_type}}', ) + g.stack @@ -118,7 +127,7 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Added', 'Show rate of added items to cache.') + g.queryPanel( - 'sum(rate(thanos_store_index_cache_items_added_total{namespace="$namespace",job=~"$job"}[$interval])) by (job, item_type)', + 'sum by (%s) (rate(thanos_store_index_cache_items_added_total{%s}[$interval]))' % [utils.joinLabels([thanos.store.dashboard.dimensions, 'item_type']), thanos.store.dashboard.selector], '{{job}} {{item_type}}', ) + g.stack @@ -126,7 +135,7 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; .addPanel( g.panel('Evicted', 'Show rate of evicted items from cache.') + g.queryPanel( - 'sum(rate(thanos_store_index_cache_items_evicted_total{namespace="$namespace",job=~"$job"}[$interval])) by (job, item_type)', + 'sum by (%s) (rate(thanos_store_index_cache_items_evicted_total{%s}[$interval]))' % [utils.joinLabels([thanos.store.dashboard.dimensions, 'item_type']), thanos.store.dashboard.selector], '{{job}} {{item_type}}', ) + g.stack @@ -138,9 +147,9 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; g.panel('Chunk Size', 'Shows size of chunks that have sent to the bucket.') + g.queryPanel( [ - 'histogram_quantile(0.99, sum(rate(thanos_bucket_store_sent_chunk_size_bytes_bucket{namespace="$namespace",job=~"$job"}[$interval])) by (job, le))', - 'sum(rate(thanos_bucket_store_sent_chunk_size_bytes_sum{namespace="$namespace",job=~"$job"}[$interval])) by (job) / sum(rate(thanos_bucket_store_sent_chunk_size_bytes_count{namespace="$namespace",job=~"$job"}[$interval])) by (job)', - 'histogram_quantile(0.99, sum(rate(thanos_bucket_store_sent_chunk_size_bytes_bucket{namespace="$namespace",job=~"$job"}[$interval])) by (job, le))', + 'histogram_quantile(0.99, sum by (%s) (rate(thanos_bucket_store_sent_chunk_size_bytes_bucket{%s}[$interval])))' % [utils.joinLabels([thanos.store.dashboard.dimensions, 'le']), thanos.store.dashboard.selector], + 'sum by (%(dimensions)s) (rate(thanos_bucket_store_sent_chunk_size_bytes_sum{%(selector)s}[$interval])) / sum by (%(dimensions)s) (rate(thanos_bucket_store_sent_chunk_size_bytes_count{%(selector)s}[$interval]))' % thanos.store.dashboard, + 'histogram_quantile(0.99, sum by (%s) (rate(thanos_bucket_store_sent_chunk_size_bytes_bucket{%s}[$interval])))' % [utils.joinLabels([thanos.store.dashboard.dimensions, 'le']), thanos.store.dashboard.selector], ], [ 'P99', @@ -157,9 +166,9 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; g.panel('Block queried') + g.queryPanel( [ - 'thanos_bucket_store_series_blocks_queried{namespace="$namespace",job=~"$job",quantile="0.99"}', - 'sum(rate(thanos_bucket_store_series_blocks_queried_sum{namespace="$namespace",job=~"$job"}[$interval])) by (job) / sum(rate(thanos_bucket_store_series_blocks_queried_count{namespace="$namespace",job=~"$job"}[$interval])) by (job)', - 'thanos_bucket_store_series_blocks_queried{namespace="$namespace",job=~"$job",quantile="0.50"}', + 'thanos_bucket_store_series_blocks_queried{%s, quantile="0.99"}' % thanos.store.dashboard.selector, + 'sum by (%(dimensions)s) (rate(thanos_bucket_store_series_blocks_queried_sum{%(selector)s}[$interval])) / sum by (%(dimensions)s) (rate(thanos_bucket_store_series_blocks_queried_count{%(selector)s}[$interval]))' % thanos.store.dashboard, + 'thanos_bucket_store_series_blocks_queried{%s, quantile="0.50"}' % thanos.store.dashboard.selector, ], [ 'P99', 'mean {{job}}', @@ -171,9 +180,9 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; g.panel('Data Fetched', 'Show the size of data fetched') + g.queryPanel( [ - 'thanos_bucket_store_series_data_fetched{namespace="$namespace",job=~"$job",quantile="0.99"}', - 'sum(rate(thanos_bucket_store_series_data_fetched_sum{namespace="$namespace",job=~"$job"}[$interval])) by (job) / sum(rate(thanos_bucket_store_series_data_fetched_count{namespace="$namespace",job=~"$job"}[$interval])) by (job)', - 'thanos_bucket_store_series_data_fetched{namespace="$namespace",job=~"$job",quantile="0.50"}', + 'thanos_bucket_store_series_data_fetched{%s, quantile="0.99"}' % thanos.store.dashboard.selector, + 'sum by (%(dimensions)s) (rate(thanos_bucket_store_series_data_fetched_sum{%(selector)s}[$interval])) / sum by (%(dimensions)s) (rate(thanos_bucket_store_series_data_fetched_count{%(selector)s}[$interval]))' % thanos.store.dashboard, + 'thanos_bucket_store_series_data_fetched{%s, quantile="0.50"}' % thanos.store.dashboard.selector, ], [ 'P99', 'mean {{job}}', @@ -186,9 +195,9 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; g.panel('Result series') + g.queryPanel( [ - 'thanos_bucket_store_series_result_series{namespace="$namespace",job=~"$job",quantile="0.99"}', - 'sum(rate(thanos_bucket_store_series_result_series_sum{namespace="$namespace",job=~"$job"}[$interval])) by (job) / sum(rate(thanos_bucket_store_series_result_series_count{namespace="$namespace",job=~"$job"}[$interval])) by (job)', - 'thanos_bucket_store_series_result_series{namespace="$namespace",job=~"$job",quantile="0.50"}', + 'thanos_bucket_store_series_result_series{%s,quantile="0.99"}' % thanos.store.dashboard.selector, + 'sum by (%(dimensions)s) (rate(thanos_bucket_store_series_result_series_sum{%(selector)s}[$interval])) / sum by (%(dimensions)s) (rate(thanos_bucket_store_series_result_series_count{%(selector)s}[$interval]))' % thanos.store.dashboard, + 'thanos_bucket_store_series_result_series{%s,quantile="0.50"}' % thanos.store.dashboard.selector, ], [ 'P99', 'mean {{job}}', @@ -201,40 +210,39 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; g.row('Series Operation Durations') .addPanel( g.panel('Get All', 'Shows how long has it taken to get all series.') + - g.latencyPanel('thanos_bucket_store_series_get_all_duration_seconds', 'namespace="$namespace",job=~"$job"') + g.latencyPanel('thanos_bucket_store_series_get_all_duration_seconds', thanos.store.dashboard.selector, thanos.store.dashboard.dimensions) ) .addPanel( g.panel('Merge', 'Shows how long has it taken to merge series.') + - g.latencyPanel('thanos_bucket_store_series_merge_duration_seconds', 'namespace="$namespace",job=~"$job"') + g.latencyPanel('thanos_bucket_store_series_merge_duration_seconds', thanos.store.dashboard.selector, thanos.store.dashboard.dimensions) ) .addPanel( g.panel('Gate', 'Shows how long has it taken for a series to wait at the gate.') + - g.latencyPanel('thanos_bucket_store_series_gate_duration_seconds', 'namespace="$namespace",job=~"$job"') + g.latencyPanel('thanos_bucket_store_series_gate_duration_seconds', thanos.store.dashboard.selector, thanos.store.dashboard.dimensions) ) ) .addRow( - g.resourceUtilizationRow() - ) + - g.template('namespace', thanos.dashboard.namespaceQuery) + - g.template('job', 'up', 'namespace="$namespace", %(selector)s' % thanos.store, true, '%(jobPrefix)s.*' % thanos.store), + g.resourceUtilizationRow(thanos.store.dashboard.selector, thanos.store.dashboard.dimensions) + ), __overviewRows__+:: [ g.row('Store') .addPanel( g.panel('gPRC (Unary) Rate', 'Shows rate of handled Unary gRPC requests from queriers.') + - g.grpcQpsPanel('server', 'namespace="$namespace",%(selector)s,grpc_type="unary"' % thanos.store) + + g.grpcRequestsPanel('grpc_server_handled_total', utils.joinLabels([thanos.dashboard.overview.selector, 'grpc_type="unary"']), thanos.dashboard.overview.dimensions) + g.addDashboardLink(thanos.store.title) ) .addPanel( g.panel('gPRC (Unary) Errors', 'Shows ratio of errors compared to the total number of handled requests from queriers.') + - g.grpcErrorsPanel('server', 'namespace="$namespace",%(selector)s,grpc_type="unary"' % thanos.store) + + g.grpcErrorsPanel('grpc_server_handled_total', utils.joinLabels([thanos.dashboard.overview.selector, 'grpc_type="unary"']), thanos.dashboard.overview.dimensions) + g.addDashboardLink(thanos.store.title) ) .addPanel( g.sloLatency( 'gRPC Latency 99th Percentile', 'Shows how long has it taken to handle requests from queriers.', - 'grpc_server_handling_seconds_bucket{grpc_type="unary",namespace="$namespace",%(selector)s}' % thanos.store, + 'grpc_server_handling_seconds_bucket{%s}' % utils.joinLabels([thanos.dashboard.overview.selector, 'grpc_type="unary"']), + thanos.dashboard.overview.dimensions, 0.99, 0.5, 1 @@ -244,11 +252,13 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; ], }, - latencyByOperationPanel(metricName, selector, multiplier='1'):: { + latencyByOperationPanel(metricName, selector, dimensions, multiplier='1'):: { + local params = { metricName: metricName, selector: selector, multiplier: multiplier, dimensions: dimensions }, + nullPointMode: 'null as zero', targets: [ { - expr: 'histogram_quantile(0.99, sum(rate(%s_bucket{%s}[$interval])) by (job, operation, le)) * %s' % [metricName, selector, multiplier], + expr: 'histogram_quantile(0.99, sum by (%(dimensions)s, operation, le) (rate(%(metricName)s_bucket{%(selector)s}[$interval]))) * %(multiplier)s' % params, format: 'time_series', intervalFactor: 2, legendFormat: 'P99 {{job}}', @@ -256,7 +266,7 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; step: 10, }, { - expr: 'sum(rate(%s_sum{%s}[$interval])) by (job, operation) * %s / sum(rate(%s_count{%s}[$interval])) by (job, operation)' % [metricName, selector, multiplier, metricName, selector], + expr: 'sum by (%(dimensions)s, operation) (rate(%(metricName)s_sum{%(selector)s}[$interval])) * %(multiplier)s / sum by (%(dimensions)s, operation) (rate(%(metricName)s_count{%(selector)s}[$interval]))' % params, format: 'time_series', intervalFactor: 2, legendFormat: 'mean {{job}}', @@ -264,7 +274,7 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; step: 10, }, { - expr: 'histogram_quantile(0.50, sum(rate(%s_bucket{%s}[$interval])) by (job, operation, le)) * %s' % [metricName, selector, multiplier], + expr: 'histogram_quantile(0.50, sum by (%(dimensions)s, operation, le) (rate(%(metricName)s_bucket{%(selector)s}[$interval]))) * %(multiplier)s' % params, format: 'time_series', intervalFactor: 2, legendFormat: 'P50 {{job}}', diff --git a/mixin/jsonnetfile.lock.json b/mixin/jsonnetfile.lock.json index 7e871c80334..de1b098e197 100644 --- a/mixin/jsonnetfile.lock.json +++ b/mixin/jsonnetfile.lock.json @@ -8,8 +8,8 @@ "subdir": "grafonnet" } }, - "version": "8fb95bd89990e493a8534205ee636bfcb8db67bd", - "sum": "tDuuSKE9f4Ew2bjBM33Rs6behLEAzkmKkShSt+jpAak=" + "version": "daad85cf3fad3580e58029414630e29956aefe21", + "sum": "zkOBVXtNSGlOdbm5TRCbEik7c/Jk+btbJqaE9qW8j3Y=" }, { "source": { @@ -18,8 +18,8 @@ "subdir": "grafana-builder" } }, - "version": "f4c59f64f80442f871a06c91edf74d014b82acaf", - "sum": "ELsYwK+kGdzX1mee2Yy+/b2mdO4Y503BOCDkFzwmGbE=" + "version": "4d4b5b1ce01003547a110f93cc86b8b7afb282a6", + "sum": "GRf2GvwEU4jhXV+JOonXSZ4wdDv8mnHBPCQ6TUVd+g8=" } ], "legacyImports": false diff --git a/mixin/lib/thanos-grafana-builder/builder.libsonnet b/mixin/lib/thanos-grafana-builder/builder.libsonnet index 4039ecf7d4d..5b879a23f33 100644 --- a/mixin/lib/thanos-grafana-builder/builder.libsonnet +++ b/mixin/lib/thanos-grafana-builder/builder.libsonnet @@ -1,5 +1,6 @@ local grafana = import 'grafonnet/grafana.libsonnet'; local template = grafana.template; +local utils = import '../utils.libsonnet'; (import 'grafana-builder/grafana.libsonnet') + { @@ -22,37 +23,6 @@ local template = grafana.template; ], }, - template(name, metricName, selector='', includeAll=false, allValues=''):: - local t = if includeAll then - template.new( - name, - '$datasource', - 'label_values(%s{%s}, %s)' % [metricName, selector, name], - label=name, - refresh=1, - sort=2, - current='all', - allValues=allValues, - includeAll=true - ) - else - template.new( - name, - '$datasource', - 'label_values(%s{%s}, %s)' % [metricName, selector, name], - label=name, - refresh=1, - sort=2, - ); - - { - templating+: { - list+: [ - t, - ], - }, - }, - spanSize(size):: { span: size, }, @@ -69,39 +39,50 @@ local template = grafana.template; }, }, - latencyPanel(metricName, selector, multiplier='1'):: { + latencyPanel(metricName, selector, dimensions, multiplier='1'):: { + local aggregatedLabels = std.split(dimensions, ','), + local dimensionsTemplate = std.join(' ', ['{{%s}}' % std.stripChars(label, ' ') for label in aggregatedLabels]), + nullPointMode: 'null as zero', targets: [ { - expr: 'histogram_quantile(0.99, sum(rate(%s_bucket{%s}[$interval])) by (job, le)) * %s' % [metricName, selector, multiplier], + expr: 'histogram_quantile(%.2f, sum by (%s) (rate(%s_bucket{%s}[$interval]))) * %s' % [percentile, utils.joinLabels([dimensions, 'le']), metricName, selector, multiplier], format: 'time_series', intervalFactor: 2, - legendFormat: 'P99 {{job}}', + legendFormat: 'p%d %s' % [100 * percentile, dimensionsTemplate], + logBase: 10, + min: null, + max: null, refId: 'A', step: 10, + } + for percentile in [0.5, 0.9, 0.99] + ], + yaxes: $.yaxes('s'), + seriesOverrides: [ + { + alias: 'p99', + color: '#FA6400', + fill: 1, + fillGradient: 1, }, { - expr: 'sum(rate(%s_sum{%s}[$interval])) by (job) * %s / sum(rate(%s_count{%s}[$interval])) by (job)' % [metricName, selector, multiplier, metricName, selector], - format: 'time_series', - intervalFactor: 2, - legendFormat: 'mean {{job}}', - refId: 'B', - step: 10, + alias: 'p90', + color: '#E0B400', + fill: 1, + fillGradient: 1, }, { - expr: 'histogram_quantile(0.50, sum(rate(%s_bucket{%s}[$interval])) by (job, le)) * %s' % [metricName, selector, multiplier], - format: 'time_series', - intervalFactor: 2, - legendFormat: 'P50 {{job}}', - refId: 'C', - step: 10, + alias: 'p50', + color: '#37872D', + fill: 10, + fillGradient: 0, }, ], - yaxes: $.yaxes('s'), }, - qpsErrTotalPanel(selectorErr, selectorTotal):: { - local expr(selector) = 'sum(rate(' + selector + '[$interval]))', // {{job}} + qpsErrTotalPanel(selectorErr, selectorTotal, dimensions):: { + local expr(selector) = 'sum by (%s) (rate(%s[$interval]))' % [dimensions, selector], aliasColors: { 'error': '#E24D42', @@ -119,8 +100,8 @@ local template = grafana.template; yaxes: $.yaxes({ format: 'percentunit' }), } + $.stack, - qpsSuccErrRatePanel(selectorErr, selectorTotal):: { - local expr(selector) = 'sum(rate(' + selector + '[$interval]))', // {{job}} + qpsSuccErrRatePanel(selectorErr, selectorTotal, dimensions):: { + local expr(selector) = 'sum by (%s) (rate(%s[$interval]))' % [dimensions, selector], aliasColors: { success: '#7EB26D', @@ -147,26 +128,26 @@ local template = grafana.template; yaxes: $.yaxes({ format: 'percentunit', max: 1 }), } + $.stack, - resourceUtilizationRow():: + resourceUtilizationRow(selector, dimensions):: $.row('Resources') .addPanel( $.panel('Memory Used') + $.queryPanel( [ - 'go_memstats_alloc_bytes{namespace="$namespace",job=~"$job"}', - 'go_memstats_heap_alloc_bytes{namespace="$namespace",job=~"$job"}', - 'rate(go_memstats_alloc_bytes_total{namespace="$namespace",job=~"$job"}[30s])', - 'rate(go_memstats_heap_alloc_bytes{namespace="$namespace",job=~"$job"}[30s])', - 'go_memstats_stack_inuse_bytes{namespace="$namespace",job=~"$job"}', - 'go_memstats_heap_inuse_bytes{namespace="$namespace",job=~"$job"}', + 'go_memstats_alloc_bytes{%s}' % selector, + 'go_memstats_heap_alloc_bytes{%s}' % selector, + 'rate(go_memstats_alloc_bytes_total{%s})[30s]' % selector, + 'rate(go_memstats_heap_alloc_bytes{%s})[30s]' % selector, + 'go_memstats_stack_inuse_bytes{%s}' % selector, + 'go_memstats_heap_inuse_bytes{%s}' % selector, ], [ 'alloc all {{instance}}', 'alloc heap {{instance}}', 'alloc rate all {{instance}}', 'alloc rate heap {{instance}}', - 'inuse stack {{instance}}', 'inuse heap {{instance}}', + 'inuse stack {{instance}}', ] ) + { yaxes: $.yaxes('bytes') }, @@ -174,14 +155,14 @@ local template = grafana.template; .addPanel( $.panel('Goroutines') + $.queryPanel( - 'go_goroutines{namespace="$namespace",job=~"$job"}', + 'go_goroutines{%s}' % selector, '{{instance}}' ) ) .addPanel( $.panel('GC Time Quantiles') + $.queryPanel( - 'go_gc_duration_seconds{namespace="$namespace",job=~"$job"}', + 'go_gc_duration_seconds{%s}' % selector, '{{quantile}} {{instance}}' ) ) + diff --git a/mixin/lib/thanos-grafana-builder/grpc.libsonnet b/mixin/lib/thanos-grafana-builder/grpc.libsonnet index c7eb4be5171..12f25ea3455 100644 --- a/mixin/lib/thanos-grafana-builder/grpc.libsonnet +++ b/mixin/lib/thanos-grafana-builder/grpc.libsonnet @@ -1,63 +1,46 @@ +local utils = import '../utils.libsonnet'; + { - grpcQpsPanel(type, selector):: { - local prefix = if type == 'client' then 'grpc_client' else 'grpc_server', + grpcRequestsPanel(metric, selector, dimensions):: { + local aggregatedLabels = std.split(dimensions, ','), + local dimensionsTemplate = std.join(' ', ['{{%s}}' % std.stripChars(label, ' ') for label in aggregatedLabels]), - aliasColors: { - Aborted: '#EAB839', - AlreadyExists: '#7EB26D', - FailedPrecondition: '#6ED0E0', - Unimplemented: '#6ED0E0', - InvalidArgument: '#EF843C', - NotFound: '#EF843C', - PermissionDenied: '#EF843C', - Unauthenticated: '#EF843C', - Canceled: '#E24D42', - DataLoss: '#E24D42', - DeadlineExceeded: '#E24D42', - Internal: '#E24D42', - OutOfRange: '#E24D42', - ResourceExhausted: '#E24D42', - Unavailable: '#E24D42', - Unknown: '#E24D42', - OK: '#7EB26D', - 'error': '#E24D42', - }, + seriesOverrides: [ + { alias: '/Aborted/', color: '#EAB839' }, + { alias: '/AlreadyExists/', color: '#37872D' }, + { alias: '/FailedPrecondition/', color: '#E0B400' }, + { alias: '/Unimplemented/', color: '#E0B400' }, + { alias: '/InvalidArgument/', color: '#1F60C4' }, + { alias: '/NotFound/', color: '#1F60C4' }, + { alias: '/PermissionDenied/', color: '#1F60C4' }, + { alias: '/Unauthenticated/', color: '#1F60C4' }, + { alias: '/Canceled/', color: '#C4162A' }, + { alias: '/DataLoss/', color: '#C4162A' }, + { alias: '/DeadlineExceeded/', color: '#C4162A' }, + { alias: '/Internal/', color: '#C4162A' }, + { alias: '/OutOfRange/', color: '#C4162A' }, + { alias: '/ResourceExhausted/', color: '#C4162A' }, + { alias: '/Unavailable/', color: '#C4162A' }, + { alias: '/Unknown/', color: '#C4162A' }, + { alias: '/OK/', color: '#37872D' }, + { alias: 'error', color: '#C4162A' }, + ], targets: [ { - expr: 'sum(rate(%s_handled_total{%s}[$interval])) by (job, grpc_method, grpc_code)' % [prefix, selector], + expr: 'sum by (%s) (rate(%s{%s}[$interval]))' % [utils.joinLabels(aggregatedLabels + ['grpc_method', 'grpc_code']), metric, selector], format: 'time_series', intervalFactor: 2, - legendFormat: '{{job}} {{grpc_method}} {{grpc_code}}', + legendFormat: dimensionsTemplate + ' {{grpc_method}} {{grpc_code}}', refId: 'A', step: 10, }, ], } + $.stack, - grpcErrorsPanel(type, selector):: - local prefix = if type == 'client' then 'grpc_client' else 'grpc_server'; + grpcErrorsPanel(metric, selector, dimensions):: $.qpsErrTotalPanel( - '%s_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss",%s}' % [prefix, selector], - '%s_started_total{%s}' % [prefix, selector], + '%s{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss",%s}' % [metric, selector], + '%s{%s}' % [metric, selector], + dimensions ), - - grpcLatencyPanel(type, selector, multiplier='1'):: - local prefix = if type == 'client' then 'grpc_client' else 'grpc_server'; - $.queryPanel( - [ - 'histogram_quantile(0.99, sum(rate(%s_handling_seconds_bucket{%s}[$interval])) by (job, grpc_method, le)) * %s' % [prefix, selector, multiplier], - ||| - sum(rate(%s_handling_seconds_sum{%s}[$interval])) by (job) * %s - / - sum(rate(%s_handling_seconds_count{%s}[$interval])) by (job) - ||| % [prefix, selector, multiplier, prefix, selector], - 'histogram_quantile(0.50, sum(rate(%s_handling_seconds_bucket{%s}[$interval])) by (job, grpc_method, le)) * %s' % [prefix, selector, multiplier], - ], - [ - 'P99 {{job}} {{grpc_method}}', - 'mean {{job}} {{grpc_method}}', - 'P50 {{job}} {{grpc_method}}', - ] - ) + - { yaxes: $.yaxes('s') }, } diff --git a/mixin/lib/thanos-grafana-builder/http.libsonnet b/mixin/lib/thanos-grafana-builder/http.libsonnet index 3d1720cbf00..d1962cccc31 100644 --- a/mixin/lib/thanos-grafana-builder/http.libsonnet +++ b/mixin/lib/thanos-grafana-builder/http.libsonnet @@ -1,29 +1,34 @@ +local utils = import '../utils.libsonnet'; + { - httpQpsPanel(metricName, selector):: { - aliasColors: { - '1xx': '#EAB839', - '2xx': '#7EB26D', - '3xx': '#6ED0E0', - '4xx': '#EF843C', - '5xx': '#E24D42', - success: '#7EB26D', - 'error': '#E24D42', - }, + httpQpsPanel(metric, selector, dimensions):: { + local aggregatedLabels = std.split(dimensions, ','), + local dimensionsTemplate = std.join(' ', ['{{%s}}' % std.stripChars(label, ' ') for label in aggregatedLabels]), + + seriesOverrides: [ + { alias: '/1../', color: '#EAB839' }, + { alias: '/2../', color: '#37872D' }, + { alias: '/3../', color: '#E0B400' }, + { alias: '/4../', color: '#1F60C4' }, + { alias: '/5../', color: '#C4162A' }, + ], + targets: [ { - expr: 'sum(label_replace(rate(%s{%s}[$interval]),"status_code", "${1}xx", "code", "([0-9])..")) by (job, handler, status_code)' % [metricName, selector], + expr: 'sum by (%s) (rate(%s{%s}[$interval]))' % [utils.joinLabels(aggregatedLabels + ['handler', 'code']), metric, selector], format: 'time_series', intervalFactor: 2, - legendFormat: '{{job}} {{handler}} {{status_code}}', + legendFormat: dimensionsTemplate + ' {{handler}} {{code}}', refId: 'A', step: 10, }, ], } + $.stack, - httpErrPanel(metricName, selector):: + httpErrPanel(metric, selector, dimensions):: $.qpsErrTotalPanel( - '%s{%s,code=~"5.."}' % [metricName, selector], - '%s{%s}' % [metricName, selector], + '%s{%s,code=~"5.."}' % [metric, selector], + '%s{%s}' % [metric, selector], + dimensions ), } diff --git a/mixin/lib/thanos-grafana-builder/slo.libsonnet b/mixin/lib/thanos-grafana-builder/slo.libsonnet index bee885037a6..80acca3cfc7 100644 --- a/mixin/lib/thanos-grafana-builder/slo.libsonnet +++ b/mixin/lib/thanos-grafana-builder/slo.libsonnet @@ -1,9 +1,14 @@ +local utils = import '../utils.libsonnet'; + { - sloLatency(title, description, selector, quantile, warning, critical):: + sloLatency(title, description, selector, dimensions, quantile, warning, critical):: + local aggregatedLabels = std.split(dimensions, ','); + local dimensionsTemplate = std.join(' ', ['{{%s}}' % std.stripChars(label, ' ') for label in aggregatedLabels]); + $.panel(title, description) + $.queryPanel( - 'histogram_quantile(%.2f, sum(rate(%s[$interval])) by (job, le))' % [quantile, selector], - '{{job}} P' + quantile * 100 + 'histogram_quantile(%.2f, sum by (%s) (rate(%s[$interval])))' % [quantile, utils.joinLabels(aggregatedLabels + ['le']), selector], + dimensionsTemplate + ' P' + quantile * 100 ) + { yaxes: $.yaxes('s'), diff --git a/mixin/lib/utils.libsonnet b/mixin/lib/utils.libsonnet index 700ada95a67..f5700196ea4 100644 --- a/mixin/lib/utils.libsonnet +++ b/mixin/lib/utils.libsonnet @@ -10,4 +10,6 @@ for group in super.groups ], }, + + joinLabels(labels): std.join(', ', std.filter(function(x) std.length(std.stripChars(x, ' ')) > 0, labels)), } diff --git a/mixin/rules/bucket_replicate.libsonnet b/mixin/rules/bucket_replicate.libsonnet index 14eb5c945d4..a25c9f838b9 100644 --- a/mixin/rules/bucket_replicate.libsonnet +++ b/mixin/rules/bucket_replicate.libsonnet @@ -7,8 +7,7 @@ groups+: [ { name: 'thanos-bucket-replicate.rules', - rules: [ - ], + rules: [], }, ], }, diff --git a/mixin/rules/query.libsonnet b/mixin/rules/query.libsonnet index 062af2a6766..44f7d13bded 100644 --- a/mixin/rules/query.libsonnet +++ b/mixin/rules/query.libsonnet @@ -2,6 +2,7 @@ local thanos = self, query+:: { selector: error 'must provide selector for Thanos Query recording rules', + dimensions: std.join(', ', std.objectFields(thanos.targetGroups) + ['job']), }, prometheusRules+:: { groups+: [ @@ -12,9 +13,9 @@ record: ':grpc_client_failures_per_unary:sum_rate', expr: ||| ( - sum(rate(grpc_client_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", %(selector)s, grpc_type="unary"}[5m])) + sum by (%(dimensions)s) (rate(grpc_client_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", %(selector)s, grpc_type="unary"}[5m])) / - sum(rate(grpc_client_started_total{%(selector)s, grpc_type="unary"}[5m])) + sum by (%(dimensions)s) (rate(grpc_client_started_total{%(selector)s, grpc_type="unary"}[5m])) ) ||| % thanos.query, }, @@ -22,9 +23,9 @@ record: ':grpc_client_failures_per_stream:sum_rate', expr: ||| ( - sum(rate(grpc_client_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", %(selector)s, grpc_type="server_stream"}[5m])) + sum by (%(dimensions)s) (rate(grpc_client_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", %(selector)s, grpc_type="server_stream"}[5m])) / - sum(rate(grpc_client_started_total{%(selector)s, grpc_type="server_stream"}[5m])) + sum by (%(dimensions)s) (rate(grpc_client_started_total{%(selector)s, grpc_type="server_stream"}[5m])) ) ||| % thanos.query, }, @@ -32,9 +33,9 @@ record: ':thanos_query_store_apis_dns_failures_per_lookup:sum_rate', expr: ||| ( - sum(rate(thanos_query_store_apis_dns_failures_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_query_store_apis_dns_failures_total{%(selector)s}[5m])) / - sum(rate(thanos_query_store_apis_dns_lookups_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_query_store_apis_dns_lookups_total{%(selector)s}[5m])) ) ||| % thanos.query, }, @@ -42,7 +43,7 @@ record: ':query_duration_seconds:histogram_quantile', expr: ||| histogram_quantile(0.99, - sum(rate(http_request_duration_seconds_bucket{%(selector)s, handler="query"}[5m])) by (le) + sum by (%(dimensions)s, le) (rate(http_request_duration_seconds_bucket{%(selector)s, handler="query"}[5m])) ) ||| % thanos.query, labels: { @@ -53,7 +54,7 @@ record: ':api_range_query_duration_seconds:histogram_quantile', expr: ||| histogram_quantile(0.99, - sum(rate(http_request_duration_seconds_bucket{%(selector)s, handler="query_range"}[5m])) by (le) + sum by (%(dimensions)s, le) (rate(http_request_duration_seconds_bucket{%(selector)s, handler="query_range"}[5m])) ) ||| % thanos.query, labels: { diff --git a/mixin/rules/receive.libsonnet b/mixin/rules/receive.libsonnet index 1017ce4fa11..fcf3b6429cb 100644 --- a/mixin/rules/receive.libsonnet +++ b/mixin/rules/receive.libsonnet @@ -2,6 +2,7 @@ local thanos = self, receive+:: { selector: error 'must provide selector for Thanos Receive recording rules', + dimensions: std.join(', ', std.objectFields(thanos.targetGroups) + ['job']), }, prometheusRules+:: { groups+: [ @@ -11,30 +12,30 @@ { record: ':grpc_server_failures_per_unary:sum_rate', expr: ||| - sum( - rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", %(selector)s, grpc_type="unary"}[5m]) + ( + sum by (%(dimensions)s) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", %(selector)s, grpc_type="unary"}[5m])) / - rate(grpc_server_started_total{%(selector)s, grpc_type="unary"}[5m]) + sum by (%(dimensions)s) (rate(grpc_server_started_total{%(selector)s, grpc_type="unary"}[5m])) ) ||| % thanos.receive, }, { record: ':grpc_server_failures_per_stream:sum_rate', expr: ||| - sum( - rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", %(selector)s, grpc_type="server_stream"}[5m]) + ( + sum by (%(dimensions)s) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", %(selector)s, grpc_type="server_stream"}[5m])) / - rate(grpc_server_started_total{%(selector)s, grpc_type="server_stream"}[5m]) + sum by (%(dimensions)s) (rate(grpc_server_started_total{%(selector)s, grpc_type="server_stream"}[5m])) ) ||| % thanos.receive, }, { record: ':http_failure_per_request:sum_rate', expr: ||| - sum( - rate(http_requests_total{handler="receive", %(selector)s, code!~"5.."}[5m]) + ( + sum by (%(dimensions)s) (rate(http_requests_total{handler="receive", %(selector)s, code!~"5.."}[5m])) / - rate(http_requests_total{handler="receive", %(selector)s}[5m]) + sum by (%(dimensions)s) (rate(http_requests_total{handler="receive", %(selector)s}[5m])) ) ||| % thanos.receive, }, @@ -42,7 +43,7 @@ record: ':http_request_duration_seconds:histogram_quantile', expr: ||| histogram_quantile(0.99, - sum(rate(http_request_duration_seconds_bucket{handler="receive", %(selector)s}[5m])) by (le) + sum by (%(dimensions)s, le) (rate(http_request_duration_seconds_bucket{handler="receive", %(selector)s}[5m])) ) ||| % thanos.receive, labels: { @@ -53,9 +54,9 @@ record: ':thanos_receive_replication_failure_per_requests:sum_rate', expr: ||| ( - sum(rate(thanos_receive_replications_total{result="error", %(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_receive_replications_total{result="error", %(selector)s}[5m])) / - sum(rate(thanos_receive_replications_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_receive_replications_total{%(selector)s}[5m])) ) ||| % thanos.receive, }, @@ -63,9 +64,9 @@ record: ':thanos_receive_forward_failure_per_requests:sum_rate', expr: ||| ( - sum(rate(thanos_receive_forward_requests_total{result="error", %(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_receive_forward_requests_total{result="error", %(selector)s}[5m])) / - sum(rate(thanos_receive_forward_requests_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_receive_forward_requests_total{%(selector)s}[5m])) ) ||| % thanos.receive, }, @@ -73,9 +74,9 @@ record: ':thanos_receive_hashring_file_failure_per_refresh:sum_rate', expr: ||| ( - sum(rate(thanos_receive_hashrings_file_errors_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_receive_hashrings_file_errors_total{%(selector)s}[5m])) / - sum(rate(thanos_receive_hashrings_file_refreshes_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_receive_hashrings_file_refreshes_total{%(selector)s}[5m])) ) ||| % thanos.receive, }, diff --git a/mixin/rules/store.libsonnet b/mixin/rules/store.libsonnet index 14574d1ef36..249c5923cb3 100644 --- a/mixin/rules/store.libsonnet +++ b/mixin/rules/store.libsonnet @@ -2,6 +2,7 @@ local thanos = self, store+:: { selector: error 'must provide selector for Thanos Store recording rules', + dimensions: std.join(', ', std.objectFields(thanos.targetGroups) + ['job']), }, prometheusRules+:: { groups+: [ @@ -12,9 +13,9 @@ record: ':grpc_server_failures_per_unary:sum_rate', expr: ||| ( - sum(rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", %(selector)s, grpc_type="unary"}[5m])) + sum by (%(dimensions)s) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", %(selector)s, grpc_type="unary"}[5m])) / - sum(rate(grpc_server_started_total{%(selector)s, grpc_type="unary"}[5m])) + sum by (%(dimensions)s) (rate(grpc_server_started_total{%(selector)s, grpc_type="unary"}[5m])) ) ||| % thanos.store, }, @@ -22,9 +23,9 @@ record: ':grpc_server_failures_per_stream:sum_rate', expr: ||| ( - sum(rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", %(selector)s, grpc_type="server_stream"}[5m])) + sum by (%(dimensions)s) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", %(selector)s, grpc_type="server_stream"}[5m])) / - sum(rate(grpc_server_started_total{%(selector)s, grpc_type="server_stream"}[5m])) + sum by (%(dimensions)s) (rate(grpc_server_started_total{%(selector)s, grpc_type="server_stream"}[5m])) ) ||| % thanos.store, }, @@ -32,9 +33,9 @@ record: ':thanos_objstore_bucket_failures_per_operation:sum_rate', expr: ||| ( - sum(rate(thanos_objstore_bucket_operation_failures_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_objstore_bucket_operation_failures_total{%(selector)s}[5m])) / - sum(rate(thanos_objstore_bucket_operations_total{%(selector)s}[5m])) + sum by (%(dimensions)s) (rate(thanos_objstore_bucket_operations_total{%(selector)s}[5m])) ) ||| % thanos.store, }, @@ -42,7 +43,7 @@ record: ':thanos_objstore_bucket_operation_duration_seconds:histogram_quantile', expr: ||| histogram_quantile(0.99, - sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{%(selector)s}[5m])) by (le) + sum by (%(dimensions)s, le) (rate(thanos_objstore_bucket_operation_duration_seconds_bucket{%(selector)s}[5m])) ) ||| % thanos.store, labels: { diff --git a/mixin/runbook.md b/mixin/runbook.md index 78da401bcde..81437de51ba 100755 --- a/mixin/runbook.md +++ b/mixin/runbook.md @@ -15,85 +15,84 @@ |Name|Summary|Description|Severity|Runbook| |---|---|---|---|---| -|ThanosBucketReplicateIsDown|Thanos Replicate has disappeared from Prometheus target discovery.|Thanos Replicate has disappeared from Prometheus target discovery.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosbucketreplicateisdown](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosbucketreplicateisdown)| -|ThanosBucketReplicateErrorRate|Thanose Replicate is failing to run.|Thanos Replicate failing to run, {{ $value humanize }}% of attempts failed.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosbucketreplicateerrorrate](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosbucketreplicateerrorrate)| -|ThanosBucketReplicateRunLatency|Thanos Replicate has a high latency for replicate operations.|Thanos Replicate {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for the replicate operations.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosbucketreplicaterunlatency](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosbucketreplicaterunlatency)| +|ThanosBucketReplicateErrorRate|Thanose Replicate is failing to run in .|Thanos Replicate is failing to run , {{$value humanize}}% of attempts failed.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosbucketreplicateerrorrate](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosbucketreplicateerrorrate)| +|ThanosBucketReplicateRunLatency|Thanos Replicate has a high latency for replicate operations.|Thanos Replicate {{$labels.job}} has a 99th percentile latency of {{$value}} seconds for the replicate operations.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosbucketreplicaterunlatency](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosbucketreplicaterunlatency)| ## thanos-compact |Name|Summary|Description|Severity|Runbook| |---|---|---|---|---| -|ThanosCompactMultipleRunning|Thanos Compact has multiple instances running.|No more than one Thanos Compact instance should be running at once. There are {{ $value }}|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompactmultiplerunning](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompactmultiplerunning)| -|ThanosCompactHalted|Thanos Compact has failed to run ans is now halted.|Thanos Compact {{$labels.job}} has failed to run and now is halted.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompacthalted](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompacthalted)| -|ThanosCompactHighCompactionFailures|Thanos Compact is failing to execute compactions.|Thanos Compact {{$labels.job}} is failing to execute {{ $value humanize }}% of compactions.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompacthighcompactionfailures](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompacthighcompactionfailures)| -|ThanosCompactBucketHighOperationFailures|Thanos Compact Bucket is having a high number of operation failures.|Thanos Compact {{$labels.job}} Bucket is failing to execute {{ $value humanize }}% of operations.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompactbuckethighoperationfailures](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompactbuckethighoperationfailures)| -|ThanosCompactHasNotRun|Thanos Compact has not uploaded anything for last 24 hours.|Thanos Compact {{$labels.job}} has not uploaded anything for 24 hours.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompacthasnotrun](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompacthasnotrun)| +|ThanosCompactMultipleRunning|Thanos Compact has multiple instances running.|No more than one Thanos Compact instance should be running at once. There are {{$value}} |warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompactmultiplerunning](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompactmultiplerunning)| +|ThanosCompactHalted|Thanos Compact has failed to run ans is now halted.|Thanos Compact {{$labels.job}} has failed to run and now is halted.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompacthalted](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompacthalted)| +|ThanosCompactHighCompactionFailures|Thanos Compact is failing to execute compactions.|Thanos Compact {{$labels.job}} , is failing to execute {{$value humanize}}% of compactions.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompacthighcompactionfailures](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompacthighcompactionfailures)| +|ThanosCompactBucketHighOperationFailures|Thanos Compact Bucket is having a high number of operation failures.|Thanos Compact {{$labels.job}} , Bucket is failing to execute {{$value humanize}}% of operations.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompactbuckethighoperationfailures](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompactbuckethighoperationfailures)| +|ThanosCompactHasNotRun|Thanos Compact has not uploaded anything for last 24 hours.|Thanos Compact {{$labels.job}} has not uploaded anything for 24 hours.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompacthasnotrun](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompacthasnotrun)| ## thanos-component-absent |Name|Summary|Description|Severity|Runbook| |---|---|---|---|---| -|ThanosBucketReplicateIsDown|thanos component has disappeared from Prometheus target discovery.|ThanosBucketReplicate has disappeared from Prometheus target discovery.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosbucketreplicateisdown](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosbucketreplicateisdown)| -|ThanosCompactIsDown|thanos component has disappeared from Prometheus target discovery.|ThanosCompact has disappeared from Prometheus target discovery.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompactisdown](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompactisdown)| -|ThanosQueryIsDown|thanos component has disappeared from Prometheus target discovery.|ThanosQuery has disappeared from Prometheus target discovery.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryisdown](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryisdown)| -|ThanosReceiveIsDown|thanos component has disappeared from Prometheus target discovery.|ThanosReceive has disappeared from Prometheus target discovery.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceiveisdown](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceiveisdown)| -|ThanosRuleIsDown|thanos component has disappeared from Prometheus target discovery.|ThanosRule has disappeared from Prometheus target discovery.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosruleisdown](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosruleisdown)| -|ThanosSidecarIsDown|thanos component has disappeared from Prometheus target discovery.|ThanosSidecar has disappeared from Prometheus target discovery.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarisdown](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarisdown)| -|ThanosStoreIsDown|thanos component has disappeared from Prometheus target discovery.|ThanosStore has disappeared from Prometheus target discovery.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstoreisdown](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstoreisdown)| +|ThanosBucketReplicateIsDown|Thanos component has disappeared.|ThanosBucketReplicate has disappeared. Prometheus target for the component cannot be discovered.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosbucketreplicateisdown](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosbucketreplicateisdown)| +|ThanosCompactIsDown|Thanos component has disappeared.|ThanosCompact has disappeared. Prometheus target for the component cannot be discovered.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompactisdown](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanoscompactisdown)| +|ThanosQueryIsDown|Thanos component has disappeared.|ThanosQuery has disappeared. Prometheus target for the component cannot be discovered.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryisdown](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryisdown)| +|ThanosReceiveIsDown|Thanos component has disappeared.|ThanosReceive has disappeared. Prometheus target for the component cannot be discovered.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceiveisdown](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceiveisdown)| +|ThanosRuleIsDown|Thanos component has disappeared.|ThanosRule has disappeared. Prometheus target for the component cannot be discovered.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosruleisdown](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosruleisdown)| +|ThanosSidecarIsDown|Thanos component has disappeared.|ThanosSidecar has disappeared. Prometheus target for the component cannot be discovered.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarisdown](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarisdown)| +|ThanosStoreIsDown|Thanos component has disappeared.|ThanosStore has disappeared. Prometheus target for the component cannot be discovered.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstoreisdown](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstoreisdown)| ## thanos-query |Name|Summary|Description|Severity|Runbook| |---|---|---|---|---| -|ThanosQueryHttpRequestQueryErrorRateHigh|Thanos Query is failing to handle requests.|Thanos Query {{$labels.job}} is failing to handle {{ $value humanize }}% of "query" requests.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryhttprequestqueryerrorratehigh](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryhttprequestqueryerrorratehigh)| -|ThanosQueryHttpRequestQueryRangeErrorRateHigh|Thanos Query is failing to handle requests.|Thanos Query {{$labels.job}} is failing to handle {{ $value humanize }}% of "query_range" requests.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryhttprequestqueryrangeerrorratehigh](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryhttprequestqueryrangeerrorratehigh)| -|ThanosQueryGrpcServerErrorRate|Thanos Query is failing to handle requests.|Thanos Query {{$labels.job}} is failing to handle {{ $value humanize }}% of requests.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosquerygrpcservererrorrate](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosquerygrpcservererrorrate)| -|ThanosQueryGrpcClientErrorRate|Thanos Query is failing to send requests.|Thanos Query {{$labels.job}} is failing to send {{ $value humanize }}% of requests.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosquerygrpcclienterrorrate](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosquerygrpcclienterrorrate)| -|ThanosQueryHighDNSFailures|Thanos Query is having high number of DNS failures.|Thanos Query {{$labels.job}} have {{ $value humanize }}% of failing DNS queries for store endpoints.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryhighdnsfailures](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryhighdnsfailures)| -|ThanosQueryInstantLatencyHigh|Thanos Query has high latency for queries.|Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for instant queries.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryinstantlatencyhigh](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryinstantlatencyhigh)| -|ThanosQueryRangeLatencyHigh|Thanos Query has high latency for queries.|Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for range queries.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryrangelatencyhigh](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryrangelatencyhigh)| +|ThanosQueryHttpRequestQueryErrorRateHigh|Thanos Query is failing to handle requests.|Thanos Query {{$labels.job}} is failing to handle {{$value humanize}}% of "query" requests.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryhttprequestqueryerrorratehigh](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryhttprequestqueryerrorratehigh)| +|ThanosQueryHttpRequestQueryRangeErrorRateHigh|Thanos Query is failing to handle requests.|Thanos Query {{$labels.job}} is failing to handle {{$value humanize}}% of "query_range" requests.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryhttprequestqueryrangeerrorratehigh](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryhttprequestqueryrangeerrorratehigh)| +|ThanosQueryGrpcServerErrorRate|Thanos Query is failing to handle requests.|Thanos Query {{$labels.job}} is failing to handle {{$value humanize}}% of requests.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosquerygrpcservererrorrate](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosquerygrpcservererrorrate)| +|ThanosQueryGrpcClientErrorRate|Thanos Query is failing to send requests.|Thanos Query {{$labels.job}} is failing to send {{$value humanize}}% of requests.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosquerygrpcclienterrorrate](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosquerygrpcclienterrorrate)| +|ThanosQueryHighDNSFailures|Thanos Query is having high number of DNS failures.|Thanos Query {{$labels.job}} have {{$value humanize}}% of failing DNS queries for store endpoints.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryhighdnsfailures](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryhighdnsfailures)| +|ThanosQueryInstantLatencyHigh|Thanos Query has high latency for queries.|Thanos Query {{$labels.job}} has a 99th percentile latency of {{$value}} seconds for instant queries.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryinstantlatencyhigh](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryinstantlatencyhigh)| +|ThanosQueryRangeLatencyHigh|Thanos Query has high latency for queries.|Thanos Query {{$labels.job}} has a 99th percentile latency of {{$value}} seconds for range queries.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryrangelatencyhigh](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryrangelatencyhigh)| ## thanos-receive |Name|Summary|Description|Severity|Runbook| |---|---|---|---|---| -|ThanosReceiveHttpRequestErrorRateHigh|Thanos Receive is failing to handle requests.|Thanos Receive {{$labels.job}} is failing to handle {{ $value humanize }}% of requests.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehttprequesterrorratehigh](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehttprequesterrorratehigh)| +|ThanosReceiveHttpRequestErrorRateHigh|Thanos Receive is failing to handle requests.|Thanos Receive {{$labels.job}} is failing to handle {{$value humanize}}% of requests.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehttprequesterrorratehigh](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehttprequesterrorratehigh)| |ThanosReceiveHttpRequestLatencyHigh|Thanos Receive has high HTTP requests latency.|Thanos Receive {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for requests.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehttprequestlatencyhigh](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehttprequestlatencyhigh)| -|ThanosReceiveHighReplicationFailures|Thanos Receive is having high number of replication failures.|Thanos Receive {{$labels.job}} is failing to replicate {{ $value humanize }}% of requests.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehighreplicationfailures](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehighreplicationfailures)| -|ThanosReceiveHighForwardRequestFailures|Thanos Receive is failing to forward requests.|Thanos Receive {{$labels.job}} is failing to forward {{ $value humanize }}% of requests.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehighforwardrequestfailures](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehighforwardrequestfailures)| -|ThanosReceiveHighHashringFileRefreshFailures|Thanos Receive is failing to refresh hasring file.|Thanos Receive {{$labels.job}} is failing to refresh hashring file, {{ $value humanize }} of attempts failed.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehighhashringfilerefreshfailures](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehighhashringfilerefreshfailures)| +|ThanosReceiveHighReplicationFailures|Thanos Receive is having high number of replication failures.|Thanos Receive {{$labels.job}} is failing to replicate {{$value humanize}}% of requests.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehighreplicationfailures](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehighreplicationfailures)| +|ThanosReceiveHighForwardRequestFailures|Thanos Receive is failing to forward requests.|Thanos Receive {{$labels.job}} is failing to forward {{$value humanize}}% of requests.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehighforwardrequestfailures](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehighforwardrequestfailures)| +|ThanosReceiveHighHashringFileRefreshFailures|Thanos Receive is failing to refresh hasring file.|Thanos Receive {{$labels.job}} is failing to refresh hashring file, {{$value humanize}} of attempts failed.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehighhashringfilerefreshfailures](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivehighhashringfilerefreshfailures)| |ThanosReceiveConfigReloadFailure|Thanos Receive has not been able to reload configuration.|Thanos Receive {{$labels.job}} has not been able to reload hashring configurations.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceiveconfigreloadfailure](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceiveconfigreloadfailure)| -|ThanosReceiveNoUpload|Thanos Receive has not uploaded latest data to object storage.|Thanos Receive {{ $labels.instance }} of {{$labels.job}} has not uploaded latest data to object storage.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivenoupload](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivenoupload)| +|ThanosReceiveNoUpload|Thanos Receive has not uploaded latest data to object storage.|Thanos Receive {{$labels.instance}} has not uploaded latest data to object storage.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivenoupload](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosreceivenoupload)| ## thanos-rule |Name|Summary|Description|Severity|Runbook| |---|---|---|---|---| -|ThanosRuleQueueIsDroppingAlerts|Thanos Rule is failing to queue alerts.|Thanos Rule {{$labels.job}} is failing to queue alerts.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulequeueisdroppingalerts](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulequeueisdroppingalerts)| -|ThanosRuleSenderIsFailingAlerts|Thanos Rule is failing to send alerts to alertmanager.|Thanos Rule {{$labels.job}} is failing to send alerts to alertmanager.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulesenderisfailingalerts](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulesenderisfailingalerts)| -|ThanosRuleHighRuleEvaluationFailures|Thanos Rule is failing to evaluate rules.|Thanos Rule {{$labels.job}} is failing to evaluate rules.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulehighruleevaluationfailures](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulehighruleevaluationfailures)| -|ThanosRuleHighRuleEvaluationWarnings|Thanos Rule has high number of evaluation warnings.|Thanos Rule {{$labels.job}} has high number of evaluation warnings.|info|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulehighruleevaluationwarnings](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulehighruleevaluationwarnings)| -|ThanosRuleRuleEvaluationLatencyHigh|Thanos Rule has high rule evaluation latency.|Thanos Rule {{$labels.job}}/{{$labels.instance}} has higher evaluation latency than interval for {{$labels.rule_group}}.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosruleruleevaluationlatencyhigh](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosruleruleevaluationlatencyhigh)| -|ThanosRuleGrpcErrorRate|Thanos Rule is failing to handle grpc requests.|Thanos Rule {{$labels.job}} is failing to handle {{ $value humanize }}% of requests.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulegrpcerrorrate](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulegrpcerrorrate)| +|ThanosRuleQueueIsDroppingAlerts|Thanos Rule is failing to queue alerts.|Thanos Rule {{$labels.instance}} is failing to queue alerts.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulequeueisdroppingalerts](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulequeueisdroppingalerts)| +|ThanosRuleSenderIsFailingAlerts|Thanos Rule is failing to send alerts to alertmanager.|Thanos Rule {{$labels.instance}} is failing to send alerts to alertmanager.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulesenderisfailingalerts](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulesenderisfailingalerts)| +|ThanosRuleHighRuleEvaluationFailures|Thanos Rule is failing to evaluate rules.|Thanos Rule {{$labels.instance}} is failing to evaluate rules.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulehighruleevaluationfailures](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulehighruleevaluationfailures)| +|ThanosRuleHighRuleEvaluationWarnings|Thanos Rule has high number of evaluation warnings.|Thanos Rule {{$labels.instance}} has high number of evaluation warnings.|info|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulehighruleevaluationwarnings](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulehighruleevaluationwarnings)| +|ThanosRuleRuleEvaluationLatencyHigh|Thanos Rule has high rule evaluation latency.|Thanos Rule {{$labels.instance}} has higher evaluation latency than interval for {{$labels.rule_group}}.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosruleruleevaluationlatencyhigh](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosruleruleevaluationlatencyhigh)| +|ThanosRuleGrpcErrorRate|Thanos Rule is failing to handle grpc requests.|Thanos Rule {{$labels.job}} is failing to handle {{$value humanize}}% of requests.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulegrpcerrorrate](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulegrpcerrorrate)| |ThanosRuleConfigReloadFailure|Thanos Rule has not been able to reload configuration.|Thanos Rule {{$labels.job}} has not been able to reload its configuration.|info|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosruleconfigreloadfailure](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosruleconfigreloadfailure)| -|ThanosRuleQueryHighDNSFailures|Thanos Rule is having high number of DNS failures.|Thanos Rule {{$labels.job}} has {{ $value humanize }}% of failing DNS queries for query endpoints.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulequeryhighdnsfailures](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulequeryhighdnsfailures)| -|ThanosRuleAlertmanagerHighDNSFailures|Thanos Rule is having high number of DNS failures.|Thanos Rule {{$labels.job}} has {{ $value humanize }}% of failing DNS queries for Alertmanager endpoints.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulealertmanagerhighdnsfailures](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulealertmanagerhighdnsfailures)| -|ThanosRuleNoEvaluationFor10Intervals|Thanos Rule has rule groups that did not evaluate for 10 intervals.|Thanos Rule {{$labels.job}} has {{ $value humanize }}% rule groups that did not evaluate for at least 10x of their expected interval.|info|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulenoevaluationfor10intervals](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulenoevaluationfor10intervals)| -|ThanosNoRuleEvaluations|Thanos Rule did not perform any rule evaluations.|Thanos Rule {{$labels.job}} did not perform any rule evaluations in the past 2 minutes.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosnoruleevaluations](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosnoruleevaluations)| +|ThanosRuleQueryHighDNSFailures|Thanos Rule is having high number of DNS failures.|Thanos Rule {{$labels.job}} has {{$value humanize}}% of failing DNS queries for query endpoints.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulequeryhighdnsfailures](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulequeryhighdnsfailures)| +|ThanosRuleAlertmanagerHighDNSFailures|Thanos Rule is having high number of DNS failures.|Thanos Rule {{$labels.instance}} has {{$value humanize}}% of failing DNS queries for Alertmanager endpoints.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulealertmanagerhighdnsfailures](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulealertmanagerhighdnsfailures)| +|ThanosRuleNoEvaluationFor10Intervals|Thanos Rule has rule groups that did not evaluate for 10 intervals.|Thanos Rule {{$labels.job}} has {{$value humanize}}% rule groups that did not evaluate for at least 10x of their expected interval.|info|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulenoevaluationfor10intervals](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosrulenoevaluationfor10intervals)| +|ThanosNoRuleEvaluations|Thanos Rule did not perform any rule evaluations.|Thanos Rule {{$labels.instance}} did not perform any rule evaluations in the past 10 minutes.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosnoruleevaluations](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosnoruleevaluations)| ## thanos-sidecar |Name|Summary|Description|Severity|Runbook| |---|---|---|---|---| -|ThanosSidecarPrometheusDown|Thanos Sidecar cannot connect to Prometheus|Thanos Sidecar {{$labels.job}} {{$labels.instance}} cannot connect to Prometheus.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarprometheusdown](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarprometheusdown)| -|ThanosSidecarBucketOperationsFailed|Thanos Sidecar bucket operations are failing|Thanos Sidecar {{$labels.job}} {{$labels.instance}} bucket operations are failing|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarbucketoperationsfailed](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarbucketoperationsfailed)| -|ThanosSidecarUnhealthy|Thanos Sidecar is unhealthy.|Thanos Sidecar {{$labels.job}} {{$labels.instance}} is unhealthy for more than {{$value}} seconds.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy)| +|ThanosSidecarPrometheusDown|Thanos Sidecar cannot connect to Prometheus|Thanos Sidecar {{$labels.instance}} cannot connect to Prometheus.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarprometheusdown](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarprometheusdown)| +|ThanosSidecarBucketOperationsFailed|Thanos Sidecar bucket operations are failing|Thanos Sidecar {{$labels.instance}} bucket operations are failing|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarbucketoperationsfailed](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarbucketoperationsfailed)| +|ThanosSidecarUnhealthy|Thanos Sidecar is unhealthy.|Thanos Sidecar {{$labels.instance}} is unhealthy for {{$value}} seconds.|critical|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarunhealthy)| ## thanos-store |Name|Summary|Description|Severity|Runbook| |---|---|---|---|---| -|ThanosStoreGrpcErrorRate|Thanos Store is failing to handle qrpcd requests.|Thanos Store {{$labels.job}} is failing to handle {{ $value humanize }}% of requests.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstoregrpcerrorrate](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstoregrpcerrorrate)| -|ThanosStoreSeriesGateLatencyHigh|Thanos Store has high latency for store series gate requests.|Thanos Store {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for store series gate requests.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstoreseriesgatelatencyhigh](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstoreseriesgatelatencyhigh)| -|ThanosStoreBucketHighOperationFailures|Thanos Store Bucket is failing to execute operations.|Thanos Store {{$labels.job}} Bucket is failing to execute {{ $value humanize }}% of operations.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstorebuckethighoperationfailures](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstorebuckethighoperationfailures)| -|ThanosStoreObjstoreOperationLatencyHigh|Thanos Store is having high latency for bucket operations.|Thanos Store {{$labels.job}} Bucket has a 99th percentile latency of {{ $value }} seconds for the bucket operations.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstoreobjstoreoperationlatencyhigh](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstoreobjstoreoperationlatencyhigh)| +|ThanosStoreGrpcErrorRate|Thanos Store is failing to handle qrpcd requests.|Thanos Store {{$labels.job}} is failing to handle {{$value humanize}}% of requests.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstoregrpcerrorrate](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstoregrpcerrorrate)| +|ThanosStoreSeriesGateLatencyHigh|Thanos Store has high latency for store series gate requests.|Thanos Store {{$labels.job}} has a 99th percentile latency of {{$value}} seconds for store series gate requests.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstoreseriesgatelatencyhigh](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstoreseriesgatelatencyhigh)| +|ThanosStoreBucketHighOperationFailures|Thanos Store Bucket is failing to execute operations.|Thanos Store {{$labels.job}} Bucket is failing to execute {{$value humanize}}% of operations.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstorebuckethighoperationfailures](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstorebuckethighoperationfailures)| +|ThanosStoreObjstoreOperationLatencyHigh|Thanos Store is having high latency for bucket operations.|Thanos Store {{$labels.job}} Bucket has a 99th percentile latency of {{$value}} seconds for the bucket operations.|warning|[https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstoreobjstoreoperationlatencyhigh](https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosstoreobjstoreoperationlatencyhigh)| diff --git a/pkg/api/query/v1.go b/pkg/api/query/v1.go index 00671eb85f3..3852ca0abea 100644 --- a/pkg/api/query/v1.go +++ b/pkg/api/query/v1.go @@ -28,6 +28,7 @@ import ( "strings" "time" + cortexutil "github.com/cortexproject/cortex/pkg/util" "github.com/go-kit/kit/log" "github.com/opentracing/opentracing-go" "github.com/pkg/errors" @@ -40,6 +41,8 @@ import ( "github.com/prometheus/prometheus/storage" "github.com/thanos-io/thanos/pkg/api" + "github.com/thanos-io/thanos/pkg/exemplars" + "github.com/thanos-io/thanos/pkg/exemplars/exemplarspb" extpromhttp "github.com/thanos-io/thanos/pkg/extprom/http" "github.com/thanos-io/thanos/pkg/gate" "github.com/thanos-io/thanos/pkg/logging" @@ -73,11 +76,13 @@ type QueryAPI struct { queryEngine func(int64) *promql.Engine ruleGroups rules.UnaryClient metadatas metadata.UnaryClient + exemplars exemplars.UnaryClient enableAutodownsampling bool enableQueryPartialResponse bool enableRulePartialResponse bool enableMetricMetadataPartialResponse bool + enableExemplarPartialResponse bool disableCORS bool replicaLabels []string @@ -96,6 +101,7 @@ func NewQueryAPI( c query.QueryableCreator, ruleGroups rules.UnaryClient, metadatas metadata.UnaryClient, + exemplars exemplars.UnaryClient, enableAutodownsampling bool, enableQueryPartialResponse bool, enableRulePartialResponse bool, @@ -116,6 +122,7 @@ func NewQueryAPI( gate: gate, ruleGroups: ruleGroups, metadatas: metadatas, + exemplars: exemplars, enableAutodownsampling: enableAutodownsampling, enableQueryPartialResponse: enableQueryPartialResponse, @@ -155,6 +162,8 @@ func (qapi *QueryAPI) Register(r *route.Router, tracer opentracing.Tracer, logge r.Get("/rules", instr("rules", NewRulesHandler(qapi.ruleGroups, qapi.enableRulePartialResponse))) r.Get("/metadata", instr("metadata", NewMetricMetadataHandler(qapi.metadatas, qapi.enableMetricMetadataPartialResponse))) + + r.Get("/query_exemplars", instr("exemplars", NewExemplarsHandler(qapi.exemplars, qapi.enableExemplarPartialResponse))) } type queryData struct { @@ -683,6 +692,38 @@ func NewRulesHandler(client rules.UnaryClient, enablePartialResponse bool) func( } } +// NewExemplarsHandler creates handler compatible with HTTP /api/v1/exemplars [link-to-be-added] +// which uses gRPC Unary Rules API. +func NewExemplarsHandler(client exemplars.UnaryClient, enablePartialResponse bool) func(*http.Request) (interface{}, []error, *api.ApiError) { + ps := storepb.PartialResponseStrategy_ABORT + if enablePartialResponse { + ps = storepb.PartialResponseStrategy_WARN + } + + return func(r *http.Request) (interface{}, []error, *api.ApiError) { + start, err := cortexutil.ParseTime(r.FormValue("start")) + if err != nil { + return nil, nil, &api.ApiError{Typ: api.ErrorBadData, Err: err} + } + end, err := cortexutil.ParseTime(r.FormValue("end")) + if err != nil { + return nil, nil, &api.ApiError{Typ: api.ErrorBadData, Err: err} + } + + req := &exemplarspb.ExemplarsRequest{ + Start: start, + End: end, + Query: r.FormValue("query"), + PartialResponseStrategy: ps, + } + exemplarsData, warnings, err := client.Exemplars(r.Context(), req) + if err != nil { + return nil, nil, &api.ApiError{Typ: api.ErrorInternal, Err: errors.Wrap(err, "retrieving exemplars")} + } + return exemplarsData, warnings, nil + } +} + var ( infMinTime = time.Unix(math.MinInt64/1000+62135596801, 0) infMaxTime = time.Unix(math.MaxInt64/1000-62135596801, 999999999) diff --git a/pkg/api/query/v1_test.go b/pkg/api/query/v1_test.go index f93f6b2b17c..4485121ce3b 100644 --- a/pkg/api/query/v1_test.go +++ b/pkg/api/query/v1_test.go @@ -162,7 +162,7 @@ func TestQueryEndpoints(t *testing.T) { app := db.Appender(context.Background()) for _, lbl := range lbls { for i := int64(0); i < 10; i++ { - _, err := app.Add(lbl, i*60000, float64(i)) + _, err := app.Append(0, lbl, i*60000, float64(i)) testutil.Ok(t, err) } } @@ -672,7 +672,7 @@ func TestMetadataEndpoints(t *testing.T) { ) for _, lbl := range recent { for i := int64(0); i < 10; i++ { - _, err := app.Add(lbl, start+(i*60_000), float64(i)) // ms + _, err := app.Append(0, lbl, start+(i*60_000), float64(i)) // ms testutil.Ok(t, err) } } diff --git a/pkg/exemplars/exemplars.go b/pkg/exemplars/exemplars.go new file mode 100644 index 00000000000..9c36ca46254 --- /dev/null +++ b/pkg/exemplars/exemplars.go @@ -0,0 +1,160 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package exemplars + +import ( + "context" + "sort" + + "github.com/pkg/errors" + "github.com/prometheus/prometheus/storage" + "github.com/thanos-io/thanos/pkg/exemplars/exemplarspb" + "github.com/thanos-io/thanos/pkg/store/labelpb" +) + +var _ UnaryClient = &GRPCClient{} + +// UnaryClient is gRPC exemplarspb.Exemplars client which expands streaming exemplars API. Useful for consumers that does not +// support streaming. +type UnaryClient interface { + Exemplars(ctx context.Context, req *exemplarspb.ExemplarsRequest) ([]*exemplarspb.ExemplarData, storage.Warnings, error) +} + +// GRPCClient allows to retrieve exemplars from local gRPC streaming server implementation. +// TODO(bwplotka): Switch to native gRPC transparent client->server adapter once available. +type GRPCClient struct { + proxy exemplarspb.ExemplarsServer + + replicaLabels map[string]struct{} +} + +type exemplarsServer struct { + // This field just exist to pseudo-implement the unused methods of the interface. + exemplarspb.Exemplars_ExemplarsServer + ctx context.Context + + warnings []error + data []*exemplarspb.ExemplarData +} + +func (srv *exemplarsServer) Send(res *exemplarspb.ExemplarsResponse) error { + if res.GetWarning() != "" { + srv.warnings = append(srv.warnings, errors.New(res.GetWarning())) + return nil + } + + if res.GetData() == nil { + return errors.New("empty exemplars data") + } + + srv.data = append(srv.data, res.GetData()) + return nil +} + +func (srv *exemplarsServer) Context() context.Context { + return srv.ctx +} + +func NewGRPCClient(es exemplarspb.ExemplarsServer) *GRPCClient { + return NewGRPCClientWithDedup(es, nil) +} + +func NewGRPCClientWithDedup(es exemplarspb.ExemplarsServer, replicaLabels []string) *GRPCClient { + c := &GRPCClient{ + proxy: es, + replicaLabels: map[string]struct{}{}, + } + + for _, label := range replicaLabels { + c.replicaLabels[label] = struct{}{} + } + return c +} + +func (rr *GRPCClient) Exemplars(ctx context.Context, req *exemplarspb.ExemplarsRequest) ([]*exemplarspb.ExemplarData, storage.Warnings, error) { + resp := &exemplarsServer{ctx: ctx} + + if err := rr.proxy.Exemplars(req, resp); err != nil { + return nil, nil, errors.Wrap(err, "proxy Exemplars") + } + + resp.data = dedupExemplarsData(resp.data, rr.replicaLabels) + for _, d := range resp.data { + d.Exemplars = dedupExemplars(d.Exemplars, rr.replicaLabels) + } + + return resp.data, resp.warnings, nil +} + +func dedupExemplarsData(exemplarsData []*exemplarspb.ExemplarData, replicaLabels map[string]struct{}) []*exemplarspb.ExemplarData { + if len(exemplarsData) == 0 { + return exemplarsData + } + + // Sort each exemplar's label names such that they are comparable. + for _, d := range exemplarsData { + sort.Slice(d.SeriesLabels.Labels, func(i, j int) bool { + return d.SeriesLabels.Labels[i].Name < d.SeriesLabels.Labels[j].Name + }) + } + + // Sort exemplars data such that they appear next to each other. + sort.Slice(exemplarsData, func(i, j int) bool { + return exemplarsData[i].Compare(exemplarsData[j]) < 0 + }) + + i := 0 + exemplarsData[i].SeriesLabels.Labels = removeReplicaLabels(exemplarsData[i].SeriesLabels.Labels, replicaLabels) + for j := 1; j < len(exemplarsData); j++ { + exemplarsData[j].SeriesLabels.Labels = removeReplicaLabels(exemplarsData[j].SeriesLabels.Labels, replicaLabels) + if exemplarsData[i].Compare(exemplarsData[j]) != 0 { + // Effectively retain exemplarsData[j] in the resulting slice. + i++ + exemplarsData[i] = exemplarsData[j] + continue + } + } + + return exemplarsData[:i+1] +} + +func dedupExemplars(exemplars []*exemplarspb.Exemplar, replicaLabels map[string]struct{}) []*exemplarspb.Exemplar { + if len(exemplars) == 0 { + return exemplars + } + + for _, e := range exemplars { + sort.Slice(e.Labels.Labels, func(i, j int) bool { + return e.Labels.Labels[i].Name < e.Labels.Labels[j].Name + }) + } + + sort.Slice(exemplars, func(i, j int) bool { + return exemplars[i].Compare(exemplars[j]) < 0 + }) + + i := 0 + exemplars[i].Labels.Labels = removeReplicaLabels(exemplars[i].Labels.Labels, replicaLabels) + for j := 1; j < len(exemplars); j++ { + exemplars[j].Labels.Labels = removeReplicaLabels(exemplars[j].Labels.Labels, replicaLabels) + if exemplars[i].Compare(exemplars[j]) != 0 { + // Effectively retain exemplars[j] in the resulting slice. + i++ + exemplars[i] = exemplars[j] + } + } + + return exemplars[:i+1] +} + +func removeReplicaLabels(labels []labelpb.ZLabel, replicaLabels map[string]struct{}) []labelpb.ZLabel { + newLabels := make([]labelpb.ZLabel, 0, len(labels)) + for _, l := range labels { + if _, ok := replicaLabels[l.Name]; !ok { + newLabels = append(newLabels, l) + } + } + + return newLabels +} diff --git a/pkg/exemplars/exemplars_test.go b/pkg/exemplars/exemplars_test.go new file mode 100644 index 00000000000..1597a021ee7 --- /dev/null +++ b/pkg/exemplars/exemplars_test.go @@ -0,0 +1,262 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package exemplars + +import ( + "testing" + + "github.com/thanos-io/thanos/pkg/exemplars/exemplarspb" + "github.com/thanos-io/thanos/pkg/store/labelpb" + "github.com/thanos-io/thanos/pkg/testutil" +) + +func TestMain(m *testing.M) { + testutil.TolerantVerifyLeakMain(m) +} + +func TestDedupExemplarsData(t *testing.T) { + for _, tc := range []struct { + name string + exemplars, want []*exemplarspb.ExemplarData + replicaLabels []string + }{ + { + name: "nil slice", + exemplars: nil, + want: nil, + }, + { + name: "empty exemplars data slice", + exemplars: []*exemplarspb.ExemplarData{}, + want: []*exemplarspb.ExemplarData{}, + }, + { + name: "empty exemplars data", + exemplars: []*exemplarspb.ExemplarData{ + { + SeriesLabels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "__name__", Value: "test_exemplar_metric_total"}, + {Name: "instance", Value: "localhost:8090"}, + {Name: "job", Value: "prometheus"}, + {Name: "service", Value: "bar"}, + }}, + }, + }, + want: []*exemplarspb.ExemplarData{ + { + SeriesLabels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "__name__", Value: "test_exemplar_metric_total"}, + {Name: "instance", Value: "localhost:8090"}, + {Name: "job", Value: "prometheus"}, + {Name: "service", Value: "bar"}, + }}, + }, + }, + }, + { + name: "multiple series", + replicaLabels: []string{"replica"}, + exemplars: []*exemplarspb.ExemplarData{ + { + SeriesLabels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "__name__", Value: "test_exemplar_metric_total"}, + {Name: "instance", Value: "localhost:8090"}, + {Name: "job", Value: "prometheus"}, + {Name: "service", Value: "bar"}, + {Name: "replica", Value: "0"}, + }}, + Exemplars: []*exemplarspb.Exemplar{ + { + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "traceID", Value: "EpTxMJ40fUus7aGY"}, + }}, + Value: 19, + Ts: 1600096955479, + }, + { + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "traceID", Value: "EpTxMJ40fUus7aGY"}, + }}, + Value: 19, + Ts: 1600096955479, + }, + }, + }, + { + SeriesLabels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "__name__", Value: "test_exemplar_metric_total"}, + {Name: "instance", Value: "localhost:8090"}, + {Name: "job", Value: "prometheus"}, + {Name: "service", Value: "bar"}, + {Name: "replica", Value: "1"}, + }}, + Exemplars: []*exemplarspb.Exemplar{ + { + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "traceID", Value: "EpTxMJ40fUus7aGY"}, + }}, + Value: 19, + Ts: 1600096955479, + }, + }, + }, + }, + want: []*exemplarspb.ExemplarData{ + { + SeriesLabels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "__name__", Value: "test_exemplar_metric_total"}, + {Name: "instance", Value: "localhost:8090"}, + {Name: "job", Value: "prometheus"}, + {Name: "service", Value: "bar"}, + }}, + Exemplars: []*exemplarspb.Exemplar{ + { + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "traceID", Value: "EpTxMJ40fUus7aGY"}, + }}, + Value: 19, + Ts: 1600096955479, + }, + { + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "traceID", Value: "EpTxMJ40fUus7aGY"}, + }}, + Value: 19, + Ts: 1600096955479, + }, + }, + }, + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + replicaLabels := make(map[string]struct{}) + for _, lbl := range tc.replicaLabels { + replicaLabels[lbl] = struct{}{} + } + testutil.Equals(t, tc.want, dedupExemplarsData(tc.exemplars, replicaLabels)) + }) + } +} + +func TestDedupExemplars(t *testing.T) { + for _, tc := range []struct { + name string + exemplars, want []*exemplarspb.Exemplar + replicaLabels []string + }{ + { + name: "nil slice", + exemplars: nil, + want: nil, + }, + { + name: "empty exemplars slice", + exemplars: []*exemplarspb.Exemplar{}, + want: []*exemplarspb.Exemplar{}, + }, + { + name: "duplicate exemplars", + exemplars: []*exemplarspb.Exemplar{ + { + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "traceID", Value: "EpTxMJ40fUus7aGY"}, + }}, + Value: 19, + Ts: 1600096955479, + }, + { + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "traceID", Value: "EpTxMJ40fUus7aGY"}, + }}, + Value: 19, + Ts: 1600096955479, + }, + }, + want: []*exemplarspb.Exemplar{ + { + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "traceID", Value: "EpTxMJ40fUus7aGY"}, + }}, + Value: 19, + Ts: 1600096955479, + }, + }, + }, + { + name: "distinct exemplars", + exemplars: []*exemplarspb.Exemplar{ + { + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "traceID", Value: "EpTxMJ40fUus7aGY"}, + }}, + Value: 19, + Ts: 1600096955479, + }, + { + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "traceID", Value: "EpTxMJ40fUus7aGY"}, + }}, + Value: 20, + Ts: 1600096955479, + }, + }, + want: []*exemplarspb.Exemplar{ + { + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "traceID", Value: "EpTxMJ40fUus7aGY"}, + }}, + Value: 19, + Ts: 1600096955479, + }, + { + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "traceID", Value: "EpTxMJ40fUus7aGY"}, + }}, + Value: 20, + Ts: 1600096955479, + }, + }, + }, + { + name: "exemplars with replica labels", + replicaLabels: []string{"replica"}, + exemplars: []*exemplarspb.Exemplar{ + { + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "traceID", Value: "EpTxMJ40fUus7aGY"}, + {Name: "replica", Value: "0"}, + }}, + Value: 19, + Ts: 1600096955479, + }, + { + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "traceID", Value: "EpTxMJ40fUus7aGY"}, + {Name: "replica", Value: "1"}, + }}, + Value: 19, + Ts: 1600096955479, + }, + }, + want: []*exemplarspb.Exemplar{ + { + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "traceID", Value: "EpTxMJ40fUus7aGY"}, + }}, + Value: 19, + Ts: 1600096955479, + }, + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + replicaLabels := make(map[string]struct{}) + for _, lbl := range tc.replicaLabels { + replicaLabels[lbl] = struct{}{} + } + testutil.Equals(t, tc.want, dedupExemplars(tc.exemplars, replicaLabels)) + }) + } +} diff --git a/pkg/exemplars/exemplarspb/custom.go b/pkg/exemplars/exemplarspb/custom.go new file mode 100644 index 00000000000..72ed6db0479 --- /dev/null +++ b/pkg/exemplars/exemplarspb/custom.go @@ -0,0 +1,99 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package exemplarspb + +import ( + "encoding/json" + "math/big" + + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/pkg/labels" + "github.com/thanos-io/thanos/pkg/store/labelpb" +) + +// UnmarshalJSON implements json.Unmarshaler. +func (m *Exemplar) UnmarshalJSON(b []byte) error { + v := struct { + Labels labelpb.ZLabelSet + TimeStamp model.Time + Value model.SampleValue + }{} + if err := json.Unmarshal(b, &v); err != nil { + return err + } + + m.Labels = v.Labels + m.Ts = int64(v.TimeStamp) + m.Value = float64(v.Value) + + return nil +} + +// MarshalJSON implements json.Marshaler. +func (m *Exemplar) MarshalJSON() ([]byte, error) { + v := struct { + Labels labels.Labels `json:"labels"` + TimeStamp model.Time `json:"timestamp"` + Value model.SampleValue `json:"value"` + }{ + Labels: labelpb.ZLabelsToPromLabels(m.Labels.Labels), + TimeStamp: model.Time(m.Ts), + Value: model.SampleValue(m.Value), + } + return json.Marshal(v) +} + +func NewExemplarsResponse(e *ExemplarData) *ExemplarsResponse { + return &ExemplarsResponse{ + Result: &ExemplarsResponse_Data{ + Data: e, + }, + } +} + +func NewWarningExemplarsResponse(warning error) *ExemplarsResponse { + return &ExemplarsResponse{ + Result: &ExemplarsResponse_Warning{ + Warning: warning.Error(), + }, + } +} + +func (s1 *ExemplarData) Compare(s2 *ExemplarData) int { + return labels.Compare(s1.SeriesLabels.PromLabels(), s2.SeriesLabels.PromLabels()) +} + +func (s *ExemplarData) SetSeriesLabels(ls labels.Labels) { + var result labelpb.ZLabelSet + + if len(ls) > 0 { + result = labelpb.ZLabelSet{Labels: labelpb.ZLabelsFromPromLabels(ls)} + } + + s.SeriesLabels = result +} + +func (e *Exemplar) SetLabels(ls labels.Labels) { + var result labelpb.ZLabelSet + + if len(ls) > 0 { + result = labelpb.ZLabelSet{Labels: labelpb.ZLabelsFromPromLabels(ls)} + } + + e.Labels = result +} + +func (e1 *Exemplar) Compare(e2 *Exemplar) int { + if d := labels.Compare(e1.Labels.PromLabels(), e2.Labels.PromLabels()); d != 0 { + return d + } + if e1.Ts < e2.Ts { + return 1 + } + if e1.Ts > e2.Ts { + return -1 + } + + return big.NewFloat(e1.Value).Cmp(big.NewFloat(e2.Value)) +} diff --git a/pkg/exemplars/exemplarspb/rpc.pb.go b/pkg/exemplars/exemplarspb/rpc.pb.go new file mode 100644 index 00000000000..886a140aea9 --- /dev/null +++ b/pkg/exemplars/exemplarspb/rpc.pb.go @@ -0,0 +1,1269 @@ +// Code generated by protoc-gen-gogo. DO NOT EDIT. +// source: exemplars/exemplarspb/rpc.proto + +package exemplarspb + +import ( + context "context" + encoding_binary "encoding/binary" + fmt "fmt" + io "io" + math "math" + math_bits "math/bits" + + _ "github.com/gogo/protobuf/gogoproto" + proto "github.com/gogo/protobuf/proto" + labelpb "github.com/thanos-io/thanos/pkg/store/labelpb" + storepb "github.com/thanos-io/thanos/pkg/store/storepb" + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" +) + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package + +type ExemplarsRequest struct { + Query string `protobuf:"bytes,1,opt,name=query,proto3" json:"query,omitempty"` + Start int64 `protobuf:"varint,2,opt,name=start,proto3" json:"start,omitempty"` + End int64 `protobuf:"varint,3,opt,name=end,proto3" json:"end,omitempty"` + PartialResponseStrategy storepb.PartialResponseStrategy `protobuf:"varint,4,opt,name=partial_response_strategy,json=partialResponseStrategy,proto3,enum=thanos.PartialResponseStrategy" json:"partial_response_strategy,omitempty"` +} + +func (m *ExemplarsRequest) Reset() { *m = ExemplarsRequest{} } +func (m *ExemplarsRequest) String() string { return proto.CompactTextString(m) } +func (*ExemplarsRequest) ProtoMessage() {} +func (*ExemplarsRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_fd9ad2a40bac3cc9, []int{0} +} +func (m *ExemplarsRequest) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *ExemplarsRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_ExemplarsRequest.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *ExemplarsRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_ExemplarsRequest.Merge(m, src) +} +func (m *ExemplarsRequest) XXX_Size() int { + return m.Size() +} +func (m *ExemplarsRequest) XXX_DiscardUnknown() { + xxx_messageInfo_ExemplarsRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_ExemplarsRequest proto.InternalMessageInfo + +type ExemplarsResponse struct { + // Types that are valid to be assigned to Result: + // *ExemplarsResponse_Data + // *ExemplarsResponse_Warning + Result isExemplarsResponse_Result `protobuf_oneof:"result"` +} + +func (m *ExemplarsResponse) Reset() { *m = ExemplarsResponse{} } +func (m *ExemplarsResponse) String() string { return proto.CompactTextString(m) } +func (*ExemplarsResponse) ProtoMessage() {} +func (*ExemplarsResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_fd9ad2a40bac3cc9, []int{1} +} +func (m *ExemplarsResponse) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *ExemplarsResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_ExemplarsResponse.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *ExemplarsResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_ExemplarsResponse.Merge(m, src) +} +func (m *ExemplarsResponse) XXX_Size() int { + return m.Size() +} +func (m *ExemplarsResponse) XXX_DiscardUnknown() { + xxx_messageInfo_ExemplarsResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_ExemplarsResponse proto.InternalMessageInfo + +type isExemplarsResponse_Result interface { + isExemplarsResponse_Result() + MarshalTo([]byte) (int, error) + Size() int +} + +type ExemplarsResponse_Data struct { + Data *ExemplarData `protobuf:"bytes,1,opt,name=data,proto3,oneof" json:"data,omitempty"` +} +type ExemplarsResponse_Warning struct { + Warning string `protobuf:"bytes,2,opt,name=warning,proto3,oneof" json:"warning,omitempty"` +} + +func (*ExemplarsResponse_Data) isExemplarsResponse_Result() {} +func (*ExemplarsResponse_Warning) isExemplarsResponse_Result() {} + +func (m *ExemplarsResponse) GetResult() isExemplarsResponse_Result { + if m != nil { + return m.Result + } + return nil +} + +func (m *ExemplarsResponse) GetData() *ExemplarData { + if x, ok := m.GetResult().(*ExemplarsResponse_Data); ok { + return x.Data + } + return nil +} + +func (m *ExemplarsResponse) GetWarning() string { + if x, ok := m.GetResult().(*ExemplarsResponse_Warning); ok { + return x.Warning + } + return "" +} + +// XXX_OneofWrappers is for the internal use of the proto package. +func (*ExemplarsResponse) XXX_OneofWrappers() []interface{} { + return []interface{}{ + (*ExemplarsResponse_Data)(nil), + (*ExemplarsResponse_Warning)(nil), + } +} + +type ExemplarData struct { + SeriesLabels labelpb.ZLabelSet `protobuf:"bytes,1,opt,name=seriesLabels,proto3" json:"seriesLabels"` + Exemplars []*Exemplar `protobuf:"bytes,2,rep,name=exemplars,proto3" json:"exemplars"` +} + +func (m *ExemplarData) Reset() { *m = ExemplarData{} } +func (m *ExemplarData) String() string { return proto.CompactTextString(m) } +func (*ExemplarData) ProtoMessage() {} +func (*ExemplarData) Descriptor() ([]byte, []int) { + return fileDescriptor_fd9ad2a40bac3cc9, []int{2} +} +func (m *ExemplarData) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *ExemplarData) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_ExemplarData.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *ExemplarData) XXX_Merge(src proto.Message) { + xxx_messageInfo_ExemplarData.Merge(m, src) +} +func (m *ExemplarData) XXX_Size() int { + return m.Size() +} +func (m *ExemplarData) XXX_DiscardUnknown() { + xxx_messageInfo_ExemplarData.DiscardUnknown(m) +} + +var xxx_messageInfo_ExemplarData proto.InternalMessageInfo + +type Exemplar struct { + Labels labelpb.ZLabelSet `protobuf:"bytes,1,opt,name=labels,proto3" json:"labels"` + Value float64 `protobuf:"fixed64,2,opt,name=value,proto3" json:"value"` + Ts int64 `protobuf:"varint,3,opt,name=ts,proto3" json:"timestamp"` +} + +func (m *Exemplar) Reset() { *m = Exemplar{} } +func (m *Exemplar) String() string { return proto.CompactTextString(m) } +func (*Exemplar) ProtoMessage() {} +func (*Exemplar) Descriptor() ([]byte, []int) { + return fileDescriptor_fd9ad2a40bac3cc9, []int{3} +} +func (m *Exemplar) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *Exemplar) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_Exemplar.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *Exemplar) XXX_Merge(src proto.Message) { + xxx_messageInfo_Exemplar.Merge(m, src) +} +func (m *Exemplar) XXX_Size() int { + return m.Size() +} +func (m *Exemplar) XXX_DiscardUnknown() { + xxx_messageInfo_Exemplar.DiscardUnknown(m) +} + +var xxx_messageInfo_Exemplar proto.InternalMessageInfo + +func init() { + proto.RegisterType((*ExemplarsRequest)(nil), "thanos.ExemplarsRequest") + proto.RegisterType((*ExemplarsResponse)(nil), "thanos.ExemplarsResponse") + proto.RegisterType((*ExemplarData)(nil), "thanos.ExemplarData") + proto.RegisterType((*Exemplar)(nil), "thanos.Exemplar") +} + +func init() { proto.RegisterFile("exemplars/exemplarspb/rpc.proto", fileDescriptor_fd9ad2a40bac3cc9) } + +var fileDescriptor_fd9ad2a40bac3cc9 = []byte{ + // 465 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x84, 0x93, 0xc1, 0x6e, 0xd3, 0x40, + 0x10, 0x86, 0xbd, 0x49, 0x1b, 0xea, 0x49, 0xa9, 0xd2, 0x55, 0x24, 0x9c, 0x48, 0xd8, 0x51, 0x4e, + 0x81, 0x43, 0x8c, 0xc2, 0x89, 0x03, 0x17, 0x0b, 0xa4, 0x4a, 0x20, 0x81, 0xb6, 0xb7, 0x72, 0xa8, + 0x36, 0x74, 0x14, 0x22, 0x39, 0xf6, 0x76, 0x77, 0x02, 0xe4, 0x01, 0xb8, 0x73, 0xe6, 0x1d, 0x78, + 0x8f, 0x1c, 0x7b, 0xe4, 0x14, 0x41, 0x72, 0xcb, 0x53, 0xa0, 0xec, 0xda, 0x69, 0x1a, 0x55, 0xea, + 0xc5, 0x3b, 0xf3, 0xcf, 0x67, 0xef, 0x3f, 0x3b, 0x6b, 0x88, 0xf0, 0x3b, 0x4e, 0x54, 0x2a, 0xb5, + 0x89, 0xb7, 0x91, 0x1a, 0xc6, 0x5a, 0x7d, 0xee, 0x2b, 0x9d, 0x53, 0xce, 0x6b, 0xf4, 0x45, 0x66, + 0xb9, 0x69, 0xb7, 0x0c, 0xe5, 0x1a, 0x63, 0xfb, 0x54, 0xc3, 0x98, 0x66, 0x0a, 0x8d, 0x43, 0xca, + 0x52, 0x2a, 0x87, 0x98, 0xee, 0x95, 0x9a, 0xa3, 0x7c, 0x94, 0xdb, 0x30, 0xde, 0x44, 0x4e, 0xed, + 0xfe, 0x66, 0xd0, 0x78, 0x5b, 0xee, 0x26, 0xf0, 0x7a, 0x8a, 0x86, 0x78, 0x13, 0x0e, 0xaf, 0xa7, + 0xa8, 0x67, 0x01, 0xeb, 0xb0, 0x9e, 0x2f, 0x5c, 0xb2, 0x51, 0x0d, 0x49, 0x4d, 0x41, 0xa5, 0xc3, + 0x7a, 0x55, 0xe1, 0x12, 0xde, 0x80, 0x2a, 0x66, 0x57, 0x41, 0xd5, 0x6a, 0x9b, 0x90, 0x7f, 0x82, + 0x96, 0x92, 0x9a, 0xc6, 0x32, 0xbd, 0xd4, 0x68, 0x54, 0x9e, 0x19, 0xbc, 0x34, 0xa4, 0x25, 0xe1, + 0x68, 0x16, 0x1c, 0x74, 0x58, 0xef, 0x64, 0x10, 0xf5, 0x5d, 0x2b, 0xfd, 0x8f, 0x0e, 0x14, 0x05, + 0x77, 0x5e, 0x60, 0xe2, 0x89, 0xba, 0xbf, 0xd0, 0x45, 0x38, 0xdd, 0xb1, 0xeb, 0x8a, 0xfc, 0x39, + 0x1c, 0x5c, 0x49, 0x92, 0xd6, 0x6e, 0x7d, 0xd0, 0x2c, 0x3f, 0x5e, 0x82, 0x6f, 0x24, 0xc9, 0x33, + 0x4f, 0x58, 0x86, 0xb7, 0xe1, 0xd1, 0x37, 0xa9, 0xb3, 0x71, 0x36, 0xb2, 0x7d, 0xf8, 0x67, 0x9e, + 0x28, 0x85, 0xe4, 0x08, 0x6a, 0x1a, 0xcd, 0x34, 0xa5, 0xee, 0x2f, 0x06, 0xc7, 0xbb, 0xaf, 0xf3, + 0x77, 0x70, 0x6c, 0x50, 0x8f, 0xd1, 0xbc, 0xdf, 0x1c, 0xad, 0x29, 0xb6, 0x3a, 0x2d, 0xb7, 0xba, + 0xb0, 0xf2, 0x39, 0x52, 0xd2, 0x9c, 0x2f, 0x22, 0x6f, 0xbd, 0x88, 0xee, 0xe0, 0xe2, 0x4e, 0xc6, + 0x5f, 0x83, 0xbf, 0x9d, 0x70, 0x50, 0xe9, 0x54, 0x7b, 0xf5, 0x41, 0x63, 0xdf, 0x74, 0xf2, 0x78, + 0xbd, 0x88, 0x6e, 0x31, 0x71, 0x1b, 0x76, 0x7f, 0x30, 0x38, 0x2a, 0x31, 0xfe, 0x0a, 0x6a, 0xe9, + 0x03, 0x96, 0x4e, 0x0a, 0x4b, 0x05, 0x28, 0x8a, 0x95, 0x47, 0x70, 0xf8, 0x55, 0xa6, 0x53, 0xb4, + 0x07, 0xc1, 0x12, 0x7f, 0xbd, 0x88, 0x9c, 0x20, 0xdc, 0xc2, 0x9f, 0x42, 0x85, 0x8c, 0x1b, 0xad, + 0xb3, 0x43, 0xe3, 0x09, 0x1a, 0x92, 0x13, 0x25, 0x2a, 0x64, 0x06, 0x1f, 0xc0, 0xdf, 0xce, 0x82, + 0x27, 0xbb, 0x49, 0xb0, 0xdf, 0x4d, 0x79, 0xb5, 0xda, 0xad, 0x7b, 0x2a, 0x6e, 0x8a, 0x2f, 0x58, + 0xf2, 0x6c, 0xfe, 0x2f, 0xf4, 0xe6, 0xcb, 0x90, 0xdd, 0x2c, 0x43, 0xf6, 0x77, 0x19, 0xb2, 0x9f, + 0xab, 0xd0, 0xbb, 0x59, 0x85, 0xde, 0x9f, 0x55, 0xe8, 0x5d, 0xd4, 0x77, 0xfe, 0x8a, 0x61, 0xcd, + 0x5e, 0xdf, 0x97, 0xff, 0x03, 0x00, 0x00, 0xff, 0xff, 0x0f, 0x15, 0xe4, 0x68, 0x35, 0x03, 0x00, + 0x00, +} + +// Reference imports to suppress errors if they are not otherwise used. +var _ context.Context +var _ grpc.ClientConn + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +const _ = grpc.SupportPackageIsVersion4 + +// ExemplarsClient is the client API for Exemplars service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://godoc.org/google.golang.org/grpc#ClientConn.NewStream. +type ExemplarsClient interface { + /// Exemplars has info for all exemplars. + /// Returned exemplars are expected to include external labels. + Exemplars(ctx context.Context, in *ExemplarsRequest, opts ...grpc.CallOption) (Exemplars_ExemplarsClient, error) +} + +type exemplarsClient struct { + cc *grpc.ClientConn +} + +func NewExemplarsClient(cc *grpc.ClientConn) ExemplarsClient { + return &exemplarsClient{cc} +} + +func (c *exemplarsClient) Exemplars(ctx context.Context, in *ExemplarsRequest, opts ...grpc.CallOption) (Exemplars_ExemplarsClient, error) { + stream, err := c.cc.NewStream(ctx, &_Exemplars_serviceDesc.Streams[0], "/thanos.Exemplars/Exemplars", opts...) + if err != nil { + return nil, err + } + x := &exemplarsExemplarsClient{stream} + if err := x.ClientStream.SendMsg(in); err != nil { + return nil, err + } + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + return x, nil +} + +type Exemplars_ExemplarsClient interface { + Recv() (*ExemplarsResponse, error) + grpc.ClientStream +} + +type exemplarsExemplarsClient struct { + grpc.ClientStream +} + +func (x *exemplarsExemplarsClient) Recv() (*ExemplarsResponse, error) { + m := new(ExemplarsResponse) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +// ExemplarsServer is the server API for Exemplars service. +type ExemplarsServer interface { + /// Exemplars has info for all exemplars. + /// Returned exemplars are expected to include external labels. + Exemplars(*ExemplarsRequest, Exemplars_ExemplarsServer) error +} + +// UnimplementedExemplarsServer can be embedded to have forward compatible implementations. +type UnimplementedExemplarsServer struct { +} + +func (*UnimplementedExemplarsServer) Exemplars(req *ExemplarsRequest, srv Exemplars_ExemplarsServer) error { + return status.Errorf(codes.Unimplemented, "method Exemplars not implemented") +} + +func RegisterExemplarsServer(s *grpc.Server, srv ExemplarsServer) { + s.RegisterService(&_Exemplars_serviceDesc, srv) +} + +func _Exemplars_Exemplars_Handler(srv interface{}, stream grpc.ServerStream) error { + m := new(ExemplarsRequest) + if err := stream.RecvMsg(m); err != nil { + return err + } + return srv.(ExemplarsServer).Exemplars(m, &exemplarsExemplarsServer{stream}) +} + +type Exemplars_ExemplarsServer interface { + Send(*ExemplarsResponse) error + grpc.ServerStream +} + +type exemplarsExemplarsServer struct { + grpc.ServerStream +} + +func (x *exemplarsExemplarsServer) Send(m *ExemplarsResponse) error { + return x.ServerStream.SendMsg(m) +} + +var _Exemplars_serviceDesc = grpc.ServiceDesc{ + ServiceName: "thanos.Exemplars", + HandlerType: (*ExemplarsServer)(nil), + Methods: []grpc.MethodDesc{}, + Streams: []grpc.StreamDesc{ + { + StreamName: "Exemplars", + Handler: _Exemplars_Exemplars_Handler, + ServerStreams: true, + }, + }, + Metadata: "exemplars/exemplarspb/rpc.proto", +} + +func (m *ExemplarsRequest) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *ExemplarsRequest) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *ExemplarsRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if m.PartialResponseStrategy != 0 { + i = encodeVarintRpc(dAtA, i, uint64(m.PartialResponseStrategy)) + i-- + dAtA[i] = 0x20 + } + if m.End != 0 { + i = encodeVarintRpc(dAtA, i, uint64(m.End)) + i-- + dAtA[i] = 0x18 + } + if m.Start != 0 { + i = encodeVarintRpc(dAtA, i, uint64(m.Start)) + i-- + dAtA[i] = 0x10 + } + if len(m.Query) > 0 { + i -= len(m.Query) + copy(dAtA[i:], m.Query) + i = encodeVarintRpc(dAtA, i, uint64(len(m.Query))) + i-- + dAtA[i] = 0xa + } + return len(dAtA) - i, nil +} + +func (m *ExemplarsResponse) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *ExemplarsResponse) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *ExemplarsResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if m.Result != nil { + { + size := m.Result.Size() + i -= size + if _, err := m.Result.MarshalTo(dAtA[i:]); err != nil { + return 0, err + } + } + } + return len(dAtA) - i, nil +} + +func (m *ExemplarsResponse_Data) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *ExemplarsResponse_Data) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + if m.Data != nil { + { + size, err := m.Data.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintRpc(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0xa + } + return len(dAtA) - i, nil +} +func (m *ExemplarsResponse_Warning) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *ExemplarsResponse_Warning) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + i -= len(m.Warning) + copy(dAtA[i:], m.Warning) + i = encodeVarintRpc(dAtA, i, uint64(len(m.Warning))) + i-- + dAtA[i] = 0x12 + return len(dAtA) - i, nil +} +func (m *ExemplarData) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *ExemplarData) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *ExemplarData) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if len(m.Exemplars) > 0 { + for iNdEx := len(m.Exemplars) - 1; iNdEx >= 0; iNdEx-- { + { + size, err := m.Exemplars[iNdEx].MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintRpc(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0x12 + } + } + { + size, err := m.SeriesLabels.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintRpc(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0xa + return len(dAtA) - i, nil +} + +func (m *Exemplar) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *Exemplar) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *Exemplar) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if m.Ts != 0 { + i = encodeVarintRpc(dAtA, i, uint64(m.Ts)) + i-- + dAtA[i] = 0x18 + } + if m.Value != 0 { + i -= 8 + encoding_binary.LittleEndian.PutUint64(dAtA[i:], uint64(math.Float64bits(float64(m.Value)))) + i-- + dAtA[i] = 0x11 + } + { + size, err := m.Labels.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintRpc(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0xa + return len(dAtA) - i, nil +} + +func encodeVarintRpc(dAtA []byte, offset int, v uint64) int { + offset -= sovRpc(v) + base := offset + for v >= 1<<7 { + dAtA[offset] = uint8(v&0x7f | 0x80) + v >>= 7 + offset++ + } + dAtA[offset] = uint8(v) + return base +} +func (m *ExemplarsRequest) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + l = len(m.Query) + if l > 0 { + n += 1 + l + sovRpc(uint64(l)) + } + if m.Start != 0 { + n += 1 + sovRpc(uint64(m.Start)) + } + if m.End != 0 { + n += 1 + sovRpc(uint64(m.End)) + } + if m.PartialResponseStrategy != 0 { + n += 1 + sovRpc(uint64(m.PartialResponseStrategy)) + } + return n +} + +func (m *ExemplarsResponse) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if m.Result != nil { + n += m.Result.Size() + } + return n +} + +func (m *ExemplarsResponse_Data) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if m.Data != nil { + l = m.Data.Size() + n += 1 + l + sovRpc(uint64(l)) + } + return n +} +func (m *ExemplarsResponse_Warning) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + l = len(m.Warning) + n += 1 + l + sovRpc(uint64(l)) + return n +} +func (m *ExemplarData) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + l = m.SeriesLabels.Size() + n += 1 + l + sovRpc(uint64(l)) + if len(m.Exemplars) > 0 { + for _, e := range m.Exemplars { + l = e.Size() + n += 1 + l + sovRpc(uint64(l)) + } + } + return n +} + +func (m *Exemplar) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + l = m.Labels.Size() + n += 1 + l + sovRpc(uint64(l)) + if m.Value != 0 { + n += 9 + } + if m.Ts != 0 { + n += 1 + sovRpc(uint64(m.Ts)) + } + return n +} + +func sovRpc(x uint64) (n int) { + return (math_bits.Len64(x|1) + 6) / 7 +} +func sozRpc(x uint64) (n int) { + return sovRpc(uint64((x << 1) ^ uint64((int64(x) >> 63)))) +} +func (m *ExemplarsRequest) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: ExemplarsRequest: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: ExemplarsRequest: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Query", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthRpc + } + postIndex := iNdEx + intStringLen + if postIndex < 0 { + return ErrInvalidLengthRpc + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Query = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Start", wireType) + } + m.Start = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Start |= int64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field End", wireType) + } + m.End = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.End |= int64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 4: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field PartialResponseStrategy", wireType) + } + m.PartialResponseStrategy = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.PartialResponseStrategy |= storepb.PartialResponseStrategy(b&0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipRpc(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthRpc + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *ExemplarsResponse) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: ExemplarsResponse: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: ExemplarsResponse: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Data", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthRpc + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthRpc + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + v := &ExemplarData{} + if err := v.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + m.Result = &ExemplarsResponse_Data{v} + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Warning", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthRpc + } + postIndex := iNdEx + intStringLen + if postIndex < 0 { + return ErrInvalidLengthRpc + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Result = &ExemplarsResponse_Warning{string(dAtA[iNdEx:postIndex])} + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipRpc(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthRpc + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *ExemplarData) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: ExemplarData: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: ExemplarData: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field SeriesLabels", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthRpc + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthRpc + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if err := m.SeriesLabels.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Exemplars", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthRpc + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthRpc + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Exemplars = append(m.Exemplars, &Exemplar{}) + if err := m.Exemplars[len(m.Exemplars)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipRpc(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthRpc + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *Exemplar) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: Exemplar: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: Exemplar: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Labels", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthRpc + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthRpc + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if err := m.Labels.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 2: + if wireType != 1 { + return fmt.Errorf("proto: wrong wireType = %d for field Value", wireType) + } + var v uint64 + if (iNdEx + 8) > l { + return io.ErrUnexpectedEOF + } + v = uint64(encoding_binary.LittleEndian.Uint64(dAtA[iNdEx:])) + iNdEx += 8 + m.Value = float64(math.Float64frombits(v)) + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Ts", wireType) + } + m.Ts = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRpc + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Ts |= int64(b&0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipRpc(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthRpc + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func skipRpc(dAtA []byte) (n int, err error) { + l := len(dAtA) + iNdEx := 0 + depth := 0 + for iNdEx < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowRpc + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + wireType := int(wire & 0x7) + switch wireType { + case 0: + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowRpc + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + iNdEx++ + if dAtA[iNdEx-1] < 0x80 { + break + } + } + case 1: + iNdEx += 8 + case 2: + var length int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowRpc + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + length |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if length < 0 { + return 0, ErrInvalidLengthRpc + } + iNdEx += length + case 3: + depth++ + case 4: + if depth == 0 { + return 0, ErrUnexpectedEndOfGroupRpc + } + depth-- + case 5: + iNdEx += 4 + default: + return 0, fmt.Errorf("proto: illegal wireType %d", wireType) + } + if iNdEx < 0 { + return 0, ErrInvalidLengthRpc + } + if depth == 0 { + return iNdEx, nil + } + } + return 0, io.ErrUnexpectedEOF +} + +var ( + ErrInvalidLengthRpc = fmt.Errorf("proto: negative length found during unmarshaling") + ErrIntOverflowRpc = fmt.Errorf("proto: integer overflow") + ErrUnexpectedEndOfGroupRpc = fmt.Errorf("proto: unexpected end of group") +) diff --git a/pkg/exemplars/exemplarspb/rpc.proto b/pkg/exemplars/exemplarspb/rpc.proto new file mode 100644 index 00000000000..50dc94fa09f --- /dev/null +++ b/pkg/exemplars/exemplarspb/rpc.proto @@ -0,0 +1,54 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +syntax = "proto3"; +package thanos; + +import "store/storepb/types.proto"; +import "store/labelpb/types.proto"; +import "gogoproto/gogo.proto"; + +option go_package = "exemplarspb"; + +option (gogoproto.sizer_all) = true; +option (gogoproto.marshaler_all) = true; +option (gogoproto.unmarshaler_all) = true; +option (gogoproto.goproto_getters_all) = false; + +// Do not generate XXX fields to reduce memory footprint and opening a door +// for zero-copy casts to/from prometheus data types. +option (gogoproto.goproto_unkeyed_all) = false; +option (gogoproto.goproto_unrecognized_all) = false; +option (gogoproto.goproto_sizecache_all) = false; + +/// Exemplars represents API that is responsible for gathering exemplars and their states. +service Exemplars { + /// Exemplars has info for all exemplars. + /// Returned exemplars are expected to include external labels. + rpc Exemplars(ExemplarsRequest) returns (stream ExemplarsResponse); +} + +message ExemplarsRequest { + string query = 1; + int64 start = 2; + int64 end = 3; + PartialResponseStrategy partial_response_strategy = 4; +} + +message ExemplarsResponse { + oneof result { + ExemplarData data = 1; + string warning = 2; + } +} + +message ExemplarData { + ZLabelSet seriesLabels = 1 [(gogoproto.jsontag) = "seriesLabels", (gogoproto.nullable) = false]; + repeated Exemplar exemplars = 2 [(gogoproto.jsontag) = "exemplars"]; +} + +message Exemplar { + ZLabelSet labels = 1 [(gogoproto.jsontag) = "labels", (gogoproto.nullable) = false]; + double value = 2 [(gogoproto.jsontag) = "value"]; + int64 ts = 3 [(gogoproto.jsontag) = "timestamp"]; +} diff --git a/pkg/exemplars/prometheus.go b/pkg/exemplars/prometheus.go new file mode 100644 index 00000000000..a76d8e6fa2d --- /dev/null +++ b/pkg/exemplars/prometheus.go @@ -0,0 +1,58 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package exemplars + +import ( + "net/url" + + "github.com/prometheus/prometheus/pkg/labels" + "github.com/thanos-io/thanos/pkg/exemplars/exemplarspb" + "github.com/thanos-io/thanos/pkg/promclient" + "github.com/thanos-io/thanos/pkg/store/labelpb" +) + +// Prometheus implements exemplarspb.Exemplars gRPC that allows to fetch exemplars from Prometheus. +type Prometheus struct { + base *url.URL + client *promclient.Client + + extLabels func() labels.Labels +} + +// NewPrometheus creates new exemplars.Prometheus. +func NewPrometheus(base *url.URL, client *promclient.Client, extLabels func() labels.Labels) *Prometheus { + return &Prometheus{ + base: base, + client: client, + extLabels: extLabels, + } +} + +// Exemplars returns all specified exemplars from Prometheus. +func (p *Prometheus) Exemplars(r *exemplarspb.ExemplarsRequest, s exemplarspb.Exemplars_ExemplarsServer) error { + exemplars, err := p.client.ExemplarsInGRPC(s.Context(), p.base, r.Query, r.Start, r.End) + if err != nil { + return err + } + + // Prometheus does not add external labels, so we need to add on our own. + enrichExemplarsWithExtLabels(exemplars, p.extLabels()) + + for _, e := range exemplars { + if err := s.Send(&exemplarspb.ExemplarsResponse{Result: &exemplarspb.ExemplarsResponse_Data{Data: e}}); err != nil { + return err + } + } + return nil +} + +func enrichExemplarsWithExtLabels(exemplars []*exemplarspb.ExemplarData, extLset labels.Labels) { + for _, d := range exemplars { + d.SetSeriesLabels(labelpb.ExtendSortedLabels(d.SeriesLabels.PromLabels(), extLset)) + for i, e := range d.Exemplars { + e.SetLabels(labelpb.ExtendSortedLabels(e.Labels.PromLabels(), extLset)) + d.Exemplars[i] = e + } + } +} diff --git a/pkg/exemplars/proxy.go b/pkg/exemplars/proxy.go new file mode 100644 index 00000000000..407f2adad9c --- /dev/null +++ b/pkg/exemplars/proxy.go @@ -0,0 +1,139 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package exemplars + +import ( + "context" + "io" + + "github.com/go-kit/kit/log" + "github.com/go-kit/kit/log/level" + "github.com/pkg/errors" + "github.com/thanos-io/thanos/pkg/exemplars/exemplarspb" + "github.com/thanos-io/thanos/pkg/store/storepb" + "golang.org/x/sync/errgroup" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +// Proxy implements exemplarspb.Exemplars gRPC that fanouts requests to +// given exemplarspb.Exemplars. +type Proxy struct { + logger log.Logger + exemplars func() []exemplarspb.ExemplarsClient +} + +// RegisterExemplarsServer register exemplars server. +func RegisterExemplarsServer(exemplarsSrv exemplarspb.ExemplarsServer) func(*grpc.Server) { + return func(s *grpc.Server) { + exemplarspb.RegisterExemplarsServer(s, exemplarsSrv) + } +} + +// NewProxy return new exemplars.Proxy. +func NewProxy(logger log.Logger, exemplars func() []exemplarspb.ExemplarsClient) *Proxy { + return &Proxy{ + logger: logger, + exemplars: exemplars, + } +} + +type exemplarsStream struct { + client exemplarspb.ExemplarsClient + request *exemplarspb.ExemplarsRequest + channel chan<- *exemplarspb.ExemplarData + server exemplarspb.Exemplars_ExemplarsServer +} + +func (s *Proxy) Exemplars(req *exemplarspb.ExemplarsRequest, srv exemplarspb.Exemplars_ExemplarsServer) error { + var ( + g, gctx = errgroup.WithContext(srv.Context()) + respChan = make(chan *exemplarspb.ExemplarData, 10) + exemplars []*exemplarspb.ExemplarData + ) + + for _, exemplarsClient := range s.exemplars() { + es := &exemplarsStream{ + client: exemplarsClient, + request: req, + channel: respChan, + server: srv, + } + g.Go(func() error { return es.receive(gctx) }) + } + + go func() { + _ = g.Wait() + close(respChan) + }() + + for resp := range respChan { + exemplars = append(exemplars, resp) + } + + if err := g.Wait(); err != nil { + level.Error(s.logger).Log("err", err) + return err + } + + for _, e := range exemplars { + if err := srv.Send(exemplarspb.NewExemplarsResponse(e)); err != nil { + return status.Error(codes.Unknown, errors.Wrap(err, "send exemplars response").Error()) + } + } + + return nil +} + +func (stream *exemplarsStream) receive(ctx context.Context) error { + exemplars, err := stream.client.Exemplars(ctx, stream.request) + if err != nil { + err = errors.Wrapf(err, "fetching exemplars from exemplars client %v", stream.client) + + if stream.request.PartialResponseStrategy == storepb.PartialResponseStrategy_ABORT { + return err + } + + if serr := stream.server.Send(exemplarspb.NewWarningExemplarsResponse(err)); serr != nil { + return serr + } + // Not an error if response strategy is warning. + return nil + } + + for { + exemplar, err := exemplars.Recv() + if err == io.EOF { + return nil + } + + if err != nil { + err = errors.Wrapf(err, "receiving exemplars from exemplars client %v", stream.client) + + if stream.request.PartialResponseStrategy == storepb.PartialResponseStrategy_ABORT { + return err + } + + if err := stream.server.Send(exemplarspb.NewWarningExemplarsResponse(err)); err != nil { + return errors.Wrapf(err, "sending exemplars error to server %v", stream.server) + } + + continue + } + + if w := exemplar.GetWarning(); w != "" { + if err := stream.server.Send(exemplarspb.NewWarningExemplarsResponse(errors.New(w))); err != nil { + return errors.Wrapf(err, "sending exemplars warning to server %v", stream.server) + } + continue + } + + select { + case stream.channel <- exemplar.GetData(): + case <-ctx.Done(): + return ctx.Err() + } + } +} diff --git a/pkg/extgrpc/client.go b/pkg/extgrpc/client.go index 58305b32ef6..d6a6dc8b3d2 100644 --- a/pkg/extgrpc/client.go +++ b/pkg/extgrpc/client.go @@ -10,19 +10,20 @@ import ( "github.com/go-kit/kit/log/level" grpc_middleware "github.com/grpc-ecosystem/go-grpc-middleware/v2" grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus" - opentracing "github.com/opentracing/opentracing-go" + "github.com/opentracing/opentracing-go" "github.com/prometheus/client_golang/prometheus" - "github.com/thanos-io/thanos/pkg/tls" - "github.com/thanos-io/thanos/pkg/tracing" "google.golang.org/grpc" "google.golang.org/grpc/credentials" + + "github.com/thanos-io/thanos/pkg/tls" + "github.com/thanos-io/thanos/pkg/tracing" ) // StoreClientGRPCOpts creates gRPC dial options for connecting to a store client. -func StoreClientGRPCOpts(logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, secure bool, cert, key, caCert, serverName string) ([]grpc.DialOption, error) { +func StoreClientGRPCOpts(logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, secure, skipVerify bool, cert, key, caCert, serverName string) ([]grpc.DialOption, error) { grpcMets := grpc_prometheus.NewClientMetrics() grpcMets.EnableClientHandlingTimeHistogram( - grpc_prometheus.WithHistogramBuckets([]float64{0.001, 0.01, 0.1, 0.3, 0.6, 1, 3, 6, 9, 20, 30, 60, 90, 120}), + grpc_prometheus.WithHistogramBuckets([]float64{0.001, 0.01, 0.1, 0.3, 0.6, 1, 3, 6, 9, 20, 30, 60, 90, 120, 240, 360, 720}), ) dialOpts := []grpc.DialOption{ // We want to make sure that we can receive huge gRPC messages from storeAPI. @@ -53,7 +54,7 @@ func StoreClientGRPCOpts(logger log.Logger, reg *prometheus.Registry, tracer ope level.Info(logger).Log("msg", "enabling client to server TLS") - tlsCfg, err := tls.NewClientConfig(logger, cert, key, caCert, serverName) + tlsCfg, err := tls.NewClientConfig(logger, cert, key, caCert, serverName, skipVerify) if err != nil { return nil, err } diff --git a/pkg/extprom/http/instrument_server.go b/pkg/extprom/http/instrument_server.go index c09bb6f8b3d..df7217adfc7 100644 --- a/pkg/extprom/http/instrument_server.go +++ b/pkg/extprom/http/instrument_server.go @@ -39,13 +39,18 @@ type defaultInstrumentationMiddleware struct { } // NewInstrumentationMiddleware provides default InstrumentationMiddleware. -func NewInstrumentationMiddleware(reg prometheus.Registerer) InstrumentationMiddleware { +// Passing nil as buckets uses the default buckets. +func NewInstrumentationMiddleware(reg prometheus.Registerer, buckets []float64) InstrumentationMiddleware { + if buckets == nil { + buckets = []float64{0.001, 0.01, 0.1, 0.3, 0.6, 1, 3, 6, 9, 20, 30, 60, 90, 120, 240, 360, 720} + } + ins := defaultInstrumentationMiddleware{ requestDuration: promauto.With(reg).NewHistogramVec( prometheus.HistogramOpts{ Name: "http_request_duration_seconds", Help: "Tracks the latencies for HTTP requests.", - Buckets: []float64{0.001, 0.01, 0.1, 0.3, 0.6, 1, 3, 6, 9, 20, 30, 60, 90, 120}, + Buckets: buckets, }, []string{"code", "handler", "method"}, ), diff --git a/pkg/promclient/promclient.go b/pkg/promclient/promclient.go index 58fb296a039..3ac3bb5147c 100644 --- a/pkg/promclient/promclient.go +++ b/pkg/promclient/promclient.go @@ -31,6 +31,7 @@ import ( "github.com/prometheus/prometheus/pkg/timestamp" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" + "github.com/thanos-io/thanos/pkg/exemplars/exemplarspb" "github.com/thanos-io/thanos/pkg/metadata/metadatapb" "github.com/thanos-io/thanos/pkg/rules/rulespb" "github.com/thanos-io/thanos/pkg/runutil" @@ -758,3 +759,26 @@ func (c *Client) MetadataInGRPC(ctx context.Context, base *url.URL, metric strin } return v.Data, c.get2xxResultWithGRPCErrors(ctx, "/metadata HTTP[client]", &u, &v) } + +// ExemplarsInGRPC returns the exemplars from Prometheus exemplars API. It uses gRPC errors. +// NOTE: This method is tested in pkg/store/prometheus_test.go against Prometheus. +func (c *Client) ExemplarsInGRPC(ctx context.Context, base *url.URL, query string, startTime, endTime int64) ([]*exemplarspb.ExemplarData, error) { + u := *base + u.Path = path.Join(u.Path, "/api/v1/query_exemplars") + q := u.Query() + + q.Add("query", query) + q.Add("start", formatTime(timestamp.Time(startTime))) + q.Add("end", formatTime(timestamp.Time(endTime))) + u.RawQuery = q.Encode() + + var m struct { + Data []*exemplarspb.ExemplarData `json:"data"` + } + + if err := c.get2xxResultWithGRPCErrors(ctx, "/prom_exemplars HTTP[client]", &u, &m); err != nil { + return nil, err + } + + return m.Data, nil +} diff --git a/pkg/query/storeset.go b/pkg/query/storeset.go index b014f683e3a..1f5c07586d8 100644 --- a/pkg/query/storeset.go +++ b/pkg/query/storeset.go @@ -16,6 +16,7 @@ import ( "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/prometheus/pkg/labels" + "github.com/thanos-io/thanos/pkg/exemplars/exemplarspb" "google.golang.org/grpc" "github.com/thanos-io/thanos/pkg/component" @@ -55,6 +56,11 @@ type MetadataSpec interface { Addr() string } +type ExemplarSpec interface { + // Addr returns ExemplarsAPI Address for the exemplars spec. It is used as its ID. + Addr() string +} + // stringError forces the error to be a string // when marshaled into a JSON. type stringError struct { @@ -188,6 +194,7 @@ type StoreSet struct { storeSpecs func() []StoreSpec ruleSpecs func() []RuleSpec metadataSpecs func() []MetadataSpec + exemplarSpecs func() []ExemplarSpec dialOpts []grpc.DialOption gRPCInfoCallTimeout time.Duration @@ -211,6 +218,7 @@ func NewStoreSet( storeSpecs func() []StoreSpec, ruleSpecs func() []RuleSpec, metadataSpecs func() []MetadataSpec, + exemplarSpecs func() []ExemplarSpec, dialOpts []grpc.DialOption, unhealthyStoreTimeout time.Duration, ) *StoreSet { @@ -231,12 +239,16 @@ func NewStoreSet( if metadataSpecs == nil { metadataSpecs = func() []MetadataSpec { return nil } } + if exemplarSpecs == nil { + exemplarSpecs = func() []ExemplarSpec { return nil } + } ss := &StoreSet{ logger: log.With(logger, "component", "storeset"), storeSpecs: storeSpecs, ruleSpecs: ruleSpecs, metadataSpecs: metadataSpecs, + exemplarSpecs: exemplarSpecs, dialOpts: dialOpts, storesMetric: storesMetric, gRPCInfoCallTimeout: 5 * time.Second, @@ -258,6 +270,9 @@ type storeRef struct { rule rulespb.RulesClient metadata metadatapb.MetadataClient + // If exemplar is not nil, then this store also support exemplars API. + exemplar exemplarspb.ExemplarsClient + // Meta (can change during runtime). labelSets []labels.Labels storeType component.StoreAPI @@ -267,7 +282,7 @@ type storeRef struct { logger log.Logger } -func (s *storeRef) Update(labelSets []labels.Labels, minTime int64, maxTime int64, storeType component.StoreAPI, rule rulespb.RulesClient, metadata metadatapb.MetadataClient) { +func (s *storeRef) Update(labelSets []labels.Labels, minTime int64, maxTime int64, storeType component.StoreAPI, rule rulespb.RulesClient, metadata metadatapb.MetadataClient, exemplar exemplarspb.ExemplarsClient) { s.mtx.Lock() defer s.mtx.Unlock() @@ -277,6 +292,7 @@ func (s *storeRef) Update(labelSets []labels.Labels, minTime int64, maxTime int6 s.maxTime = maxTime s.rule = rule s.metadata = metadata + s.exemplar = exemplar } func (s *storeRef) StoreType() component.StoreAPI { @@ -300,6 +316,13 @@ func (s *storeRef) HasMetadataAPI() bool { return s.metadata != nil } +func (s *storeRef) HasExemplarsAPI() bool { + s.mtx.RLock() + defer s.mtx.RUnlock() + + return s.exemplar != nil +} + func (s *storeRef) LabelSets() []labels.Labels { s.mtx.RLock() defer s.mtx.RUnlock() @@ -405,6 +428,10 @@ func (s *StoreSet) Update(ctx context.Context) { level.Info(s.logger).Log("msg", "adding new rulesAPI to query storeset", "address", addr) } + if st.HasExemplarsAPI() { + level.Info(s.logger).Log("msg", "adding new exemplarsAPI to query storeset", "address", addr) + } + level.Info(s.logger).Log("msg", "adding new storeAPI to query storeset", "address", addr, "extLset", extLset) } @@ -426,6 +453,7 @@ func (s *StoreSet) getActiveStores(ctx context.Context, stores map[string]*store storeAddrSet = make(map[string]struct{}) ruleAddrSet = make(map[string]struct{}) metadataAddrSet = make(map[string]struct{}) + exemplarAddrSet = make(map[string]struct{}) ) // Gather active stores map concurrently. Build new store if does not exist already. @@ -438,6 +466,11 @@ func (s *StoreSet) getActiveStores(ctx context.Context, stores map[string]*store metadataAddrSet[metadataSpec.Addr()] = struct{}{} } + // Gather active stores map concurrently. Build new store if does not exist already. + for _, exemplarSpec := range s.exemplarSpecs() { + exemplarAddrSet[exemplarSpec.Addr()] = struct{}{} + } + // Gather healthy stores map concurrently. Build new store if does not exist already. for _, storeSpec := range s.storeSpecs() { if _, ok := storeAddrSet[storeSpec.Addr()]; ok { @@ -478,6 +511,11 @@ func (s *StoreSet) getActiveStores(ctx context.Context, stores map[string]*store metadata = metadatapb.NewMetadataClient(st.cc) } + var exemplar exemplarspb.ExemplarsClient + if _, ok := exemplarAddrSet[addr]; ok { + exemplar = exemplarspb.NewExemplarsClient(st.cc) + } + // Check existing or new store. Is it healthy? What are current metadata? labelSets, minTime, maxTime, storeType, err := spec.Metadata(ctx, st.StoreClient) if err != nil { @@ -502,7 +540,7 @@ func (s *StoreSet) getActiveStores(ctx context.Context, stores map[string]*store } s.updateStoreStatus(st, nil) - st.Update(labelSets, minTime, maxTime, storeType, rule, metadata) + st.Update(labelSets, minTime, maxTime, storeType, rule, metadata, exemplar) mtx.Lock() defer mtx.Unlock() @@ -600,6 +638,20 @@ func (s *StoreSet) GetMetadataClients() []metadatapb.MetadataClient { return metadataClients } +// GetExemplarsClients returns a list of all active exemplars clients. +func (s *StoreSet) GetExemplarsClients() []exemplarspb.ExemplarsClient { + s.storesMtx.RLock() + defer s.storesMtx.RUnlock() + + exemplars := make([]exemplarspb.ExemplarsClient, 0, len(s.stores)) + for _, st := range s.stores { + if st.HasExemplarsAPI() { + exemplars = append(exemplars, st.exemplar) + } + } + return exemplars +} + func (s *StoreSet) Close() { s.storesMtx.Lock() defer s.storesMtx.Unlock() diff --git a/pkg/query/storeset_test.go b/pkg/query/storeset_test.go index 7de48faafbf..fd03f45c945 100644 --- a/pkg/query/storeset_test.go +++ b/pkg/query/storeset_test.go @@ -199,6 +199,9 @@ func TestStoreSet_Update(t *testing.T) { func() (specs []MetadataSpec) { return nil }, + func() (specs []ExemplarSpec) { + return nil + }, testGRPCOpts, time.Minute) storeSet.gRPCInfoCallTimeout = 2 * time.Second defer storeSet.Close() @@ -549,6 +552,7 @@ func TestStoreSet_Update_NoneAvailable(t *testing.T) { }, func() (specs []RuleSpec) { return nil }, func() (specs []MetadataSpec) { return nil }, + func() (specs []ExemplarSpec) { return nil }, testGRPCOpts, time.Minute) storeSet.gRPCInfoCallTimeout = 2 * time.Second @@ -635,6 +639,8 @@ func TestQuerierStrict(t *testing.T) { return nil }, func() (specs []MetadataSpec) { return nil + }, func() []ExemplarSpec { + return nil }, testGRPCOpts, time.Minute) defer storeSet.Close() storeSet.gRPCInfoCallTimeout = 1 * time.Second @@ -688,6 +694,7 @@ func TestStoreSet_Update_Rules(t *testing.T) { name string storeSpecs func() []StoreSpec ruleSpecs func() []RuleSpec + exemplarSpecs func() []ExemplarSpec expectedStores int expectedRules int }{ @@ -757,6 +764,12 @@ func TestStoreSet_Update_Rules(t *testing.T) { NewGRPCStoreSpec(stores.orderAddrs[1], false), } }, + exemplarSpecs: func() []ExemplarSpec { + return []ExemplarSpec{ + NewGRPCStoreSpec(stores.orderAddrs[0], false), + NewGRPCStoreSpec(stores.orderAddrs[1], false), + } + }, expectedStores: 2, expectedRules: 2, }, @@ -765,6 +778,7 @@ func TestStoreSet_Update_Rules(t *testing.T) { tc.storeSpecs, tc.ruleSpecs, func() []MetadataSpec { return nil }, + tc.exemplarSpecs, testGRPCOpts, time.Minute) t.Run(tc.name, func(t *testing.T) { @@ -940,6 +954,7 @@ func TestStoreSet_Rules_Discovery(t *testing.T) { func() []MetadataSpec { return nil }, + func() []ExemplarSpec { return nil }, testGRPCOpts, time.Minute) defer storeSet.Close() diff --git a/pkg/query/test.go b/pkg/query/test.go index 08843a15365..5d2e2faba47 100644 --- a/pkg/query/test.go +++ b/pkg/query/test.go @@ -434,7 +434,7 @@ func (cmd *loadCmd) Append(a storage.Appender) error { m := cmd.metrics[h] for _, s := range smpls { - if _, err := a.Add(m, s.T, s.V); err != nil { + if _, err := a.Append(0, m, s.T, s.V); err != nil { return err } } diff --git a/pkg/queryfrontend/roundtrip_test.go b/pkg/queryfrontend/roundtrip_test.go index 9eaaad8f863..c00b33fcec9 100644 --- a/pkg/queryfrontend/roundtrip_test.go +++ b/pkg/queryfrontend/roundtrip_test.go @@ -14,7 +14,7 @@ import ( "time" cortexcache "github.com/cortexproject/cortex/pkg/chunk/cache" - "github.com/cortexproject/cortex/pkg/ingester/client" + "github.com/cortexproject/cortex/pkg/cortexpb" "github.com/cortexproject/cortex/pkg/querier/queryrange" cortexvalidation "github.com/cortexproject/cortex/pkg/util/validation" "github.com/go-kit/kit/log" @@ -694,8 +694,8 @@ func promqlResults(fail bool) (*int, http.Handler) { ResultType: string(parser.ValueTypeMatrix), Result: []queryrange.SampleStream{ { - Labels: []client.LabelAdapter{}, - Samples: []client.Sample{ + Labels: []cortexpb.LabelAdapter{}, + Samples: []cortexpb.Sample{ {Value: 0, TimestampMs: 0}, {Value: 1, TimestampMs: 1}, }, diff --git a/pkg/receive/handler.go b/pkg/receive/handler.go index 0337fe495ff..80a168f30cb 100644 --- a/pkg/receive/handler.go +++ b/pkg/receive/handler.go @@ -150,7 +150,9 @@ func NewHandler(logger log.Logger, o *Options) *Handler { ins := extpromhttp.NewNopInstrumentationMiddleware() if o.Registry != nil { - ins = extpromhttp.NewInstrumentationMiddleware(o.Registry) + ins = extpromhttp.NewInstrumentationMiddleware(o.Registry, + []float64{0.001, 0.005, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.25, 0.5, 0.75, 1, 2, 3, 4, 5}, + ) } readyf := h.testReady diff --git a/pkg/receive/handler_test.go b/pkg/receive/handler_test.go index 70637d49fd5..4bcb314d2e9 100644 --- a/pkg/receive/handler_test.go +++ b/pkg/receive/handler_test.go @@ -262,7 +262,7 @@ func TestReceiveQuorum(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, }, }, @@ -273,7 +273,7 @@ func TestReceiveQuorum(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(nil, nil, commitErrFn, nil), + appender: newFakeAppender(nil, commitErrFn, nil), }, }, }, @@ -284,7 +284,7 @@ func TestReceiveQuorum(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(conflictErrFn, nil, nil, nil), + appender: newFakeAppender(conflictErrFn, nil, nil), }, }, }, @@ -295,10 +295,10 @@ func TestReceiveQuorum(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, }, }, @@ -309,13 +309,13 @@ func TestReceiveQuorum(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, }, }, @@ -326,13 +326,13 @@ func TestReceiveQuorum(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, }, }, @@ -343,13 +343,13 @@ func TestReceiveQuorum(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(nil, nil, commitErrFn, nil), + appender: newFakeAppender(nil, commitErrFn, nil), }, { - appender: newFakeAppender(nil, nil, commitErrFn, nil), + appender: newFakeAppender(nil, commitErrFn, nil), }, { - appender: newFakeAppender(nil, nil, commitErrFn, nil), + appender: newFakeAppender(nil, commitErrFn, nil), }, }, }, @@ -360,13 +360,13 @@ func TestReceiveQuorum(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(nil, nil, commitErrFn, nil), + appender: newFakeAppender(nil, commitErrFn, nil), }, { - appender: newFakeAppender(nil, nil, commitErrFn, nil), + appender: newFakeAppender(nil, commitErrFn, nil), }, { - appender: newFakeAppender(nil, nil, commitErrFn, nil), + appender: newFakeAppender(nil, commitErrFn, nil), }, }, }, @@ -377,15 +377,15 @@ func TestReceiveQuorum(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), appenderErr: appenderErrFn, }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), appenderErr: appenderErrFn, }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), appenderErr: appenderErrFn, }, }, @@ -397,13 +397,13 @@ func TestReceiveQuorum(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(conflictErrFn, nil, nil, nil), + appender: newFakeAppender(conflictErrFn, nil, nil), }, { - appender: newFakeAppender(conflictErrFn, nil, nil, nil), + appender: newFakeAppender(conflictErrFn, nil, nil), }, { - appender: newFakeAppender(conflictErrFn, nil, nil, nil), + appender: newFakeAppender(conflictErrFn, nil, nil), }, }, }, @@ -414,13 +414,13 @@ func TestReceiveQuorum(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(conflictErrFn, nil, commitErrFn, nil), + appender: newFakeAppender(conflictErrFn, commitErrFn, nil), }, { - appender: newFakeAppender(conflictErrFn, nil, commitErrFn, nil), + appender: newFakeAppender(conflictErrFn, commitErrFn, nil), }, { - appender: newFakeAppender(conflictErrFn, nil, commitErrFn, nil), + appender: newFakeAppender(conflictErrFn, commitErrFn, nil), }, }, }, @@ -431,13 +431,13 @@ func TestReceiveQuorum(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(cycleErrors([]error{storage.ErrOutOfBounds, storage.ErrOutOfOrderSample, storage.ErrDuplicateSampleForTimestamp}), nil, nil, nil), + appender: newFakeAppender(cycleErrors([]error{storage.ErrOutOfBounds, storage.ErrOutOfOrderSample, storage.ErrDuplicateSampleForTimestamp}), nil, nil), }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, }, }, @@ -448,13 +448,13 @@ func TestReceiveQuorum(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(nil, nil, commitErrFn, nil), + appender: newFakeAppender(nil, commitErrFn, nil), }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, }, }, @@ -465,13 +465,13 @@ func TestReceiveQuorum(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(cycleErrors([]error{storage.ErrOutOfBounds, storage.ErrOutOfOrderSample, storage.ErrDuplicateSampleForTimestamp}), nil, nil, nil), + appender: newFakeAppender(cycleErrors([]error{storage.ErrOutOfBounds, storage.ErrOutOfOrderSample, storage.ErrDuplicateSampleForTimestamp}), nil, nil), }, { - appender: newFakeAppender(conflictErrFn, nil, nil, nil), + appender: newFakeAppender(conflictErrFn, nil, nil), }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, }, }, @@ -482,13 +482,13 @@ func TestReceiveQuorum(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(cycleErrors([]error{storage.ErrOutOfBounds, storage.ErrOutOfOrderSample, storage.ErrDuplicateSampleForTimestamp}), nil, nil, nil), + appender: newFakeAppender(cycleErrors([]error{storage.ErrOutOfBounds, storage.ErrOutOfOrderSample, storage.ErrDuplicateSampleForTimestamp}), nil, nil), }, { - appender: newFakeAppender(nil, nil, commitErrFn, nil), + appender: newFakeAppender(nil, commitErrFn, nil), }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, }, }, @@ -499,13 +499,13 @@ func TestReceiveQuorum(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(nil, nil, commitErrFn, nil), + appender: newFakeAppender(nil, commitErrFn, nil), }, { - appender: newFakeAppender(nil, nil, commitErrFn, nil), + appender: newFakeAppender(nil, commitErrFn, nil), }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, }, }, @@ -598,7 +598,7 @@ func TestReceiveWithConsistencyDelay(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, }, }, @@ -609,7 +609,7 @@ func TestReceiveWithConsistencyDelay(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(nil, nil, commitErrFn, nil), + appender: newFakeAppender(nil, commitErrFn, nil), }, }, }, @@ -620,7 +620,7 @@ func TestReceiveWithConsistencyDelay(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(conflictErrFn, nil, nil, nil), + appender: newFakeAppender(conflictErrFn, nil, nil), }, }, }, @@ -631,10 +631,10 @@ func TestReceiveWithConsistencyDelay(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, }, }, @@ -645,13 +645,13 @@ func TestReceiveWithConsistencyDelay(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, }, }, @@ -662,13 +662,13 @@ func TestReceiveWithConsistencyDelay(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, }, }, @@ -679,13 +679,13 @@ func TestReceiveWithConsistencyDelay(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(nil, nil, commitErrFn, nil), + appender: newFakeAppender(nil, commitErrFn, nil), }, { - appender: newFakeAppender(nil, nil, commitErrFn, nil), + appender: newFakeAppender(nil, commitErrFn, nil), }, { - appender: newFakeAppender(nil, nil, commitErrFn, nil), + appender: newFakeAppender(nil, commitErrFn, nil), }, }, }, @@ -696,13 +696,13 @@ func TestReceiveWithConsistencyDelay(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(nil, nil, commitErrFn, nil), + appender: newFakeAppender(nil, commitErrFn, nil), }, { - appender: newFakeAppender(nil, nil, commitErrFn, nil), + appender: newFakeAppender(nil, commitErrFn, nil), }, { - appender: newFakeAppender(nil, nil, commitErrFn, nil), + appender: newFakeAppender(nil, commitErrFn, nil), }, }, }, @@ -713,15 +713,15 @@ func TestReceiveWithConsistencyDelay(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), appenderErr: appenderErrFn, }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), appenderErr: appenderErrFn, }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), appenderErr: appenderErrFn, }, }, @@ -733,13 +733,13 @@ func TestReceiveWithConsistencyDelay(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(conflictErrFn, nil, nil, nil), + appender: newFakeAppender(conflictErrFn, nil, nil), }, { - appender: newFakeAppender(conflictErrFn, nil, nil, nil), + appender: newFakeAppender(conflictErrFn, nil, nil), }, { - appender: newFakeAppender(conflictErrFn, nil, nil, nil), + appender: newFakeAppender(conflictErrFn, nil, nil), }, }, }, @@ -750,13 +750,13 @@ func TestReceiveWithConsistencyDelay(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(conflictErrFn, nil, commitErrFn, nil), + appender: newFakeAppender(conflictErrFn, commitErrFn, nil), }, { - appender: newFakeAppender(conflictErrFn, nil, commitErrFn, nil), + appender: newFakeAppender(conflictErrFn, commitErrFn, nil), }, { - appender: newFakeAppender(conflictErrFn, nil, commitErrFn, nil), + appender: newFakeAppender(conflictErrFn, commitErrFn, nil), }, }, }, @@ -767,13 +767,13 @@ func TestReceiveWithConsistencyDelay(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(cycleErrors([]error{storage.ErrOutOfBounds, storage.ErrOutOfOrderSample, storage.ErrDuplicateSampleForTimestamp}), nil, nil, nil), + appender: newFakeAppender(cycleErrors([]error{storage.ErrOutOfBounds, storage.ErrOutOfOrderSample, storage.ErrDuplicateSampleForTimestamp}), nil, nil), }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, }, }, @@ -784,13 +784,13 @@ func TestReceiveWithConsistencyDelay(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(nil, nil, commitErrFn, nil), + appender: newFakeAppender(nil, commitErrFn, nil), }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, }, }, @@ -801,13 +801,13 @@ func TestReceiveWithConsistencyDelay(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(cycleErrors([]error{storage.ErrOutOfBounds, storage.ErrOutOfOrderSample, storage.ErrDuplicateSampleForTimestamp}), nil, nil, nil), + appender: newFakeAppender(cycleErrors([]error{storage.ErrOutOfBounds, storage.ErrOutOfOrderSample, storage.ErrDuplicateSampleForTimestamp}), nil, nil), }, { - appender: newFakeAppender(conflictErrFn, nil, nil, nil), + appender: newFakeAppender(conflictErrFn, nil, nil), }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, }, }, @@ -818,13 +818,13 @@ func TestReceiveWithConsistencyDelay(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(cycleErrors([]error{storage.ErrOutOfBounds, storage.ErrOutOfOrderSample, storage.ErrDuplicateSampleForTimestamp}), nil, nil, nil), + appender: newFakeAppender(cycleErrors([]error{storage.ErrOutOfBounds, storage.ErrOutOfOrderSample, storage.ErrDuplicateSampleForTimestamp}), nil, nil), }, { - appender: newFakeAppender(nil, nil, commitErrFn, nil), + appender: newFakeAppender(nil, commitErrFn, nil), }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, }, }, @@ -835,13 +835,13 @@ func TestReceiveWithConsistencyDelay(t *testing.T) { wreq: wreq1, appendables: []*fakeAppendable{ { - appender: newFakeAppender(nil, nil, commitErrFn, nil), + appender: newFakeAppender(nil, commitErrFn, nil), }, { - appender: newFakeAppender(nil, nil, commitErrFn, nil), + appender: newFakeAppender(nil, commitErrFn, nil), }, { - appender: newFakeAppender(nil, nil, nil, nil), + appender: newFakeAppender(nil, nil, nil), }, }, }, diff --git a/pkg/receive/multitsdb_test.go b/pkg/receive/multitsdb_test.go index 8e6eeeaaf06..76847ac28da 100644 --- a/pkg/receive/multitsdb_test.go +++ b/pkg/receive/multitsdb_test.go @@ -34,9 +34,9 @@ func TestMultiTSDB(t *testing.T) { t.Run("run fresh", func(t *testing.T) { m := NewMultiTSDB( dir, logger, prometheus.NewRegistry(), &tsdb.Options{ - MinBlockDuration: int64(2 * time.Hour / time.Millisecond), - MaxBlockDuration: int64(2 * time.Hour / time.Millisecond), - RetentionDuration: int64(6 * time.Hour / time.Millisecond), + MinBlockDuration: (2 * time.Hour).Milliseconds(), + MaxBlockDuration: (2 * time.Hour).Milliseconds(), + RetentionDuration: (6 * time.Hour).Milliseconds(), NoLockfile: true, }, labels.FromStrings("replica", "01"), @@ -62,11 +62,11 @@ func TestMultiTSDB(t *testing.T) { return err })) - _, err = a.Add(labels.FromStrings("a", "1", "b", "2"), 1, 2.41241) + _, err = a.Append(0, labels.FromStrings("a", "1", "b", "2"), 1, 2.41241) testutil.Ok(t, err) - _, err = a.Add(labels.FromStrings("a", "1", "b", "2"), 2, 3.41241) + _, err = a.Append(0, labels.FromStrings("a", "1", "b", "2"), 2, 3.41241) testutil.Ok(t, err) - _, err = a.Add(labels.FromStrings("a", "1", "b", "2"), 3, 4.41241) + _, err = a.Append(0, labels.FromStrings("a", "1", "b", "2"), 3, 4.41241) testutil.Ok(t, err) testutil.Ok(t, a.Commit()) @@ -89,11 +89,11 @@ func TestMultiTSDB(t *testing.T) { return err })) - _, err = a.Add(labels.FromStrings("a", "1", "b", "2"), 1, 20.41241) + _, err = a.Append(0, labels.FromStrings("a", "1", "b", "2"), 1, 20.41241) testutil.Ok(t, err) - _, err = a.Add(labels.FromStrings("a", "1", "b", "2"), 2, 30.41241) + _, err = a.Append(0, labels.FromStrings("a", "1", "b", "2"), 2, 30.41241) testutil.Ok(t, err) - _, err = a.Add(labels.FromStrings("a", "1", "b", "2"), 3, 40.41241) + _, err = a.Append(0, labels.FromStrings("a", "1", "b", "2"), 3, 40.41241) testutil.Ok(t, err) testutil.Ok(t, a.Commit()) @@ -102,9 +102,9 @@ func TestMultiTSDB(t *testing.T) { t.Run("run on existing storage", func(t *testing.T) { m := NewMultiTSDB( dir, logger, prometheus.NewRegistry(), &tsdb.Options{ - MinBlockDuration: int64(2 * time.Hour / time.Millisecond), - MaxBlockDuration: int64(2 * time.Hour / time.Millisecond), - RetentionDuration: int64(6 * time.Hour / time.Millisecond), + MinBlockDuration: (2 * time.Hour).Milliseconds(), + MaxBlockDuration: (2 * time.Hour).Milliseconds(), + RetentionDuration: (6 * time.Hour).Milliseconds(), NoLockfile: true, }, labels.FromStrings("replica", "01"), @@ -258,3 +258,46 @@ func (s *storeSeriesServer) Send(r *storepb.SeriesResponse) error { func (s *storeSeriesServer) Context() context.Context { return s.ctx } + +func BenchmarkMultiTSDB(b *testing.B) { + dir, err := ioutil.TempDir("", "multitsdb") + testutil.Ok(b, err) + defer func() { testutil.Ok(b, os.RemoveAll(dir)) }() + + m := NewMultiTSDB(dir, log.NewNopLogger(), prometheus.NewRegistry(), &tsdb.Options{ + MinBlockDuration: (2 * time.Hour).Milliseconds(), + MaxBlockDuration: (2 * time.Hour).Milliseconds(), + RetentionDuration: (6 * time.Hour).Milliseconds(), + NoLockfile: true, + }, labels.FromStrings("replica", "test"), + "tenant_id", + nil, + false, + metadata.NoneFunc, + ) + defer func() { testutil.Ok(b, m.Close()) }() + + testutil.Ok(b, m.Flush()) + testutil.Ok(b, m.Open()) + + app, err := m.TenantAppendable("foo") + testutil.Ok(b, err) + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + var a storage.Appender + testutil.Ok(b, runutil.Retry(1*time.Second, ctx.Done(), func() error { + a, err = app.Appender(context.Background()) + return err + })) + + l := labels.FromStrings("a", "1", "b", "2") + + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + _, _ = a.Append(0, l, int64(i), float64(i)) + } +} diff --git a/pkg/receive/writer.go b/pkg/receive/writer.go index 8fe1cfe68c0..ed76a1610e1 100644 --- a/pkg/receive/writer.go +++ b/pkg/receive/writer.go @@ -71,7 +71,7 @@ func (r *Writer) Write(ctx context.Context, tenantID string, wreq *prompb.WriteR // Append as many valid samples as possible, but keep track of the errors. for _, s := range t.Samples { - _, err = app.Add(lset, s.Timestamp, s.Value) + _, err = app.Append(0, lset, s.Timestamp, s.Value) switch err { case nil: continue @@ -142,21 +142,16 @@ func (f *fakeAppendable) Appender(_ context.Context) (storage.Appender, error) { type fakeAppender struct { sync.Mutex samples map[uint64][]prompb.Sample - addErr func() error - addFastErr func() error + appendErr func() error commitErr func() error rollbackErr func() error } var _ storage.Appender = &fakeAppender{} -// TODO(kakkoyun): Linter - `addFastErr` always receives `nil`. -func newFakeAppender(addErr, addFastErr, commitErr, rollbackErr func() error) *fakeAppender { //nolint:unparam - if addErr == nil { - addErr = nilErrFn - } - if addFastErr == nil { - addFastErr = nilErrFn +func newFakeAppender(appendErr, commitErr, rollbackErr func() error) *fakeAppender { //nolint:unparam + if appendErr == nil { + appendErr = nilErrFn } if commitErr == nil { commitErr = nilErrFn @@ -166,8 +161,7 @@ func newFakeAppender(addErr, addFastErr, commitErr, rollbackErr func() error) *f } return &fakeAppender{ samples: make(map[uint64][]prompb.Sample), - addErr: addErr, - addFastErr: addFastErr, + appendErr: appendErr, commitErr: commitErr, rollbackErr: rollbackErr, } @@ -182,19 +176,14 @@ func (f *fakeAppender) Get(l labels.Labels) []prompb.Sample { return res } -func (f *fakeAppender) Add(l labels.Labels, t int64, v float64) (uint64, error) { - f.Lock() - defer f.Unlock() - ref := l.Hash() - f.samples[ref] = append(f.samples[ref], prompb.Sample{Value: v, Timestamp: t}) - return ref, f.addErr() -} - -func (f *fakeAppender) AddFast(ref uint64, t int64, v float64) error { +func (f *fakeAppender) Append(ref uint64, l labels.Labels, t int64, v float64) (uint64, error) { f.Lock() defer f.Unlock() - f.samples[ref] = append(f.samples[ref], prompb.Sample{Value: v, Timestamp: t}) - return f.addFastErr() + if ref == 0 { + ref = l.Hash() + } + f.samples[ref] = append(f.samples[ref], prompb.Sample{Timestamp: t, Value: v}) + return ref, f.appendErr() } func (f *fakeAppender) Commit() error { diff --git a/pkg/rules/manager_test.go b/pkg/rules/manager_test.go index 0d35fbd0bed..5f04764367b 100644 --- a/pkg/rules/manager_test.go +++ b/pkg/rules/manager_test.go @@ -34,11 +34,10 @@ func (n nopAppendable) Appender(_ context.Context) storage.Appender { return nop type nopAppender struct{} -func (n nopAppender) Add(l labels.Labels, t int64, v float64) (uint64, error) { return 0, nil } -func (n nopAppender) AddFast(ref uint64, t int64, v float64) error { return nil } -func (n nopAppender) Commit() error { return nil } -func (n nopAppender) Rollback() error { return nil } -func (n nopAppender) Appender(_ context.Context) (storage.Appender, error) { return n, nil } +func (n nopAppender) Append(uint64, labels.Labels, int64, float64) (uint64, error) { return 0, nil } +func (n nopAppender) Commit() error { return nil } +func (n nopAppender) Rollback() error { return nil } +func (n nopAppender) Appender(_ context.Context) (storage.Appender, error) { return n, nil } type nopQueryable struct{} diff --git a/pkg/rules/rules_test.go b/pkg/rules/rules_test.go index ada7bf8ebf3..0f0c99499a3 100644 --- a/pkg/rules/rules_test.go +++ b/pkg/rules/rules_test.go @@ -36,7 +36,7 @@ func testRulesAgainstExamples(t *testing.T, dir string, server rulespb.RulesServ { Name: "thanos-bucket-replicate", File: filepath.Join(dir, "alerts.yaml"), - Rules: []*rulespb.Rule{someAlert, someAlert, someAlert}, + Rules: []*rulespb.Rule{someAlert, someAlert}, Interval: 60, PartialResponseStrategy: storepb.PartialResponseStrategy_ABORT, }, diff --git a/pkg/server/grpc/grpc.go b/pkg/server/grpc/grpc.go index 0d029b20f7f..3f8155ff8ee 100644 --- a/pkg/server/grpc/grpc.go +++ b/pkg/server/grpc/grpc.go @@ -11,7 +11,7 @@ import ( "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" - kit "github.com/grpc-ecosystem/go-grpc-middleware/providers/kit/v2" + "github.com/grpc-ecosystem/go-grpc-middleware/providers/kit/v2" grpc_middleware "github.com/grpc-ecosystem/go-grpc-middleware/v2" grpc_logging "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/logging" grpc_recovery "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/recovery" diff --git a/pkg/store/bucket_test.go b/pkg/store/bucket_test.go index f165a6785de..caa244f2a1f 100644 --- a/pkg/store/bucket_test.go +++ b/pkg/store/bucket_test.go @@ -1061,7 +1061,7 @@ func uploadTestBlock(t testing.TB, tmpDir string, bkt objstore.Bucket, series in func appendTestData(t testing.TB, app storage.Appender, series int) { addSeries := func(l labels.Labels) { - _, err := app.Add(l, 0, 0) + _, err := app.Append(0, l, 0, 0) testutil.Ok(t, err) } @@ -1415,7 +1415,7 @@ func TestBucketSeries_OneBlock_InMemIndexCacheSegfault(t *testing.T) { ts := int64(i) lbls := labels.FromStrings("foo", "bar", "b", "1", "i", fmt.Sprintf("%07d%s", ts, storetestutil.LabelLongSuffix)) - _, err := app.Add(lbls, ts, 0) + _, err := app.Append(0, lbls, ts, 0) testutil.Ok(t, err) } testutil.Ok(t, app.Commit()) @@ -1454,7 +1454,7 @@ func TestBucketSeries_OneBlock_InMemIndexCacheSegfault(t *testing.T) { ts := int64(i) lbls := labels.FromStrings("foo", "bar", "b", "2", "i", fmt.Sprintf("%07d%s", ts, storetestutil.LabelLongSuffix)) - _, err := app.Add(lbls, ts, 0) + _, err := app.Append(0, lbls, ts, 0) testutil.Ok(t, err) } testutil.Ok(t, app.Commit()) @@ -1703,7 +1703,7 @@ func TestSeries_BlockWithMultipleChunks(t *testing.T) { // Appending a single sample is very unoptimised, but guarantees each chunk is always MaxSamplesPerChunk // (except the last one, which could be smaller). app := h.Appender(context.Background()) - _, err := app.Add(series, ts, float64(ts)) + _, err := app.Append(0, series, ts, float64(ts)) testutil.Ok(t, err) testutil.Ok(t, app.Commit()) } @@ -1980,10 +1980,11 @@ func createBlockWithOneSeriesWithStep(t testutil.TB, dir string, lbls labels.Lab app := h.Appender(context.Background()) ts := int64(blockIndex * totalSamples) - ref, err := app.Add(lbls, ts, random.Float64()) + ref, err := app.Append(0, lbls, ts, random.Float64()) testutil.Ok(t, err) for i := 1; i < totalSamples; i++ { - testutil.Ok(t, app.AddFast(ref, ts+step*int64(i), random.Float64())) + _, err := app.Append(ref, nil, ts+step*int64(i), random.Float64()) + testutil.Ok(t, err) } testutil.Ok(t, app.Commit()) diff --git a/pkg/store/prometheus_test.go b/pkg/store/prometheus_test.go index 4cda3ddafab..e637b9afaa8 100644 --- a/pkg/store/prometheus_test.go +++ b/pkg/store/prometheus_test.go @@ -45,11 +45,11 @@ func testPrometheusStoreSeriesE2e(t *testing.T, prefix string) { baseT := timestamp.FromTime(time.Now()) / 1000 * 1000 a := p.Appender() - _, err = a.Add(labels.FromStrings("a", "b"), baseT+100, 1) + _, err = a.Append(0, labels.FromStrings("a", "b"), baseT+100, 1) testutil.Ok(t, err) - _, err = a.Add(labels.FromStrings("a", "b"), baseT+200, 2) + _, err = a.Append(0, labels.FromStrings("a", "b"), baseT+200, 2) testutil.Ok(t, err) - _, err = a.Add(labels.FromStrings("a", "b"), baseT+300, 3) + _, err = a.Append(0, labels.FromStrings("a", "b"), baseT+300, 3) testutil.Ok(t, err) testutil.Ok(t, a.Commit()) @@ -179,13 +179,13 @@ func TestPrometheusStore_SeriesLabels_e2e(t *testing.T) { baseT := timestamp.FromTime(time.Now()) / 1000 * 1000 a := p.Appender() - _, err = a.Add(labels.FromStrings("a", "b", "b", "d"), baseT+100, 1) + _, err = a.Append(0, labels.FromStrings("a", "b", "b", "d"), baseT+100, 1) testutil.Ok(t, err) - _, err = a.Add(labels.FromStrings("a", "c", "b", "d", "job", "test"), baseT+200, 2) + _, err = a.Append(0, labels.FromStrings("a", "c", "b", "d", "job", "test"), baseT+200, 2) testutil.Ok(t, err) - _, err = a.Add(labels.FromStrings("a", "d", "b", "d", "job", "test"), baseT+300, 3) + _, err = a.Append(0, labels.FromStrings("a", "d", "b", "d", "job", "test"), baseT+300, 3) testutil.Ok(t, err) - _, err = a.Add(labels.FromStrings("b", "d", "job", "test"), baseT+400, 4) + _, err = a.Append(0, labels.FromStrings("b", "d", "job", "test"), baseT+400, 4) testutil.Ok(t, err) testutil.Ok(t, a.Commit()) @@ -359,11 +359,11 @@ func TestPrometheusStore_LabelNames_e2e(t *testing.T) { defer func() { testutil.Ok(t, p.Stop()) }() a := p.Appender() - _, err = a.Add(labels.FromStrings("a", "b"), 0, 1) + _, err = a.Append(0, labels.FromStrings("a", "b"), 0, 1) testutil.Ok(t, err) - _, err = a.Add(labels.FromStrings("a", "c"), 0, 1) + _, err = a.Append(0, labels.FromStrings("a", "c"), 0, 1) testutil.Ok(t, err) - _, err = a.Add(labels.FromStrings("a", "a"), 0, 1) + _, err = a.Append(0, labels.FromStrings("a", "a"), 0, 1) testutil.Ok(t, err) testutil.Ok(t, a.Commit()) @@ -404,11 +404,11 @@ func TestPrometheusStore_LabelValues_e2e(t *testing.T) { defer func() { testutil.Ok(t, p.Stop()) }() a := p.Appender() - _, err = a.Add(labels.FromStrings("a", "b"), 0, 1) + _, err = a.Append(0, labels.FromStrings("a", "b"), 0, 1) testutil.Ok(t, err) - _, err = a.Add(labels.FromStrings("a", "c"), 0, 1) + _, err = a.Append(0, labels.FromStrings("a", "c"), 0, 1) testutil.Ok(t, err) - _, err = a.Add(labels.FromStrings("a", "a"), 0, 1) + _, err = a.Append(0, labels.FromStrings("a", "a"), 0, 1) testutil.Ok(t, err) testutil.Ok(t, a.Commit()) @@ -452,9 +452,9 @@ func TestPrometheusStore_ExternalLabelValues_e2e(t *testing.T) { defer func() { testutil.Ok(t, p.Stop()) }() a := p.Appender() - _, err = a.Add(labels.FromStrings("ext_a", "b"), 0, 1) + _, err = a.Append(0, labels.FromStrings("ext_a", "b"), 0, 1) testutil.Ok(t, err) - _, err = a.Add(labels.FromStrings("a", "b"), 0, 1) + _, err = a.Append(0, labels.FromStrings("a", "b"), 0, 1) testutil.Ok(t, err) testutil.Ok(t, a.Commit()) @@ -494,11 +494,11 @@ func TestPrometheusStore_Series_MatchExternalLabel_e2e(t *testing.T) { baseT := timestamp.FromTime(time.Now()) / 1000 * 1000 a := p.Appender() - _, err = a.Add(labels.FromStrings("a", "b"), baseT+100, 1) + _, err = a.Append(0, labels.FromStrings("a", "b"), baseT+100, 1) testutil.Ok(t, err) - _, err = a.Add(labels.FromStrings("a", "b"), baseT+200, 2) + _, err = a.Append(0, labels.FromStrings("a", "b"), baseT+200, 2) testutil.Ok(t, err) - _, err = a.Add(labels.FromStrings("a", "b"), baseT+300, 3) + _, err = a.Append(0, labels.FromStrings("a", "b"), baseT+300, 3) testutil.Ok(t, err) testutil.Ok(t, a.Commit()) @@ -574,7 +574,7 @@ func testSeries_SplitSamplesIntoChunksWithMaxSizeOf120(t *testing.T, appender st offset := int64(2*math.MaxUint16 + 5) for i := int64(0); i < offset; i++ { - _, err := appender.Add(labels.FromStrings("a", "b"), baseT+i, 1) + _, err := appender.Append(0, labels.FromStrings("a", "b"), baseT+i, 1) testutil.Ok(t, err) } diff --git a/pkg/store/storepb/testutil/series.go b/pkg/store/storepb/testutil/series.go index cb9305cfaaa..6ed8dd73a6b 100644 --- a/pkg/store/storepb/testutil/series.go +++ b/pkg/store/storepb/testutil/series.go @@ -88,7 +88,8 @@ func CreateHeadWithSeries(t testing.TB, j int, opts HeadGenOptions) (*tsdb.Head, app := h.Appender(context.Background()) for i := 0; i < opts.Series; i++ { tsLabel := j*opts.Series*opts.SamplesPerSeries + i*opts.SamplesPerSeries - ref, err := app.Add( + ref, err := app.Append( + 0, labels.FromStrings("foo", "bar", "i", fmt.Sprintf("%07d%s", tsLabel, LabelLongSuffix)), int64(tsLabel)*opts.ScrapeInterval.Milliseconds(), opts.Random.Float64(), @@ -96,7 +97,8 @@ func CreateHeadWithSeries(t testing.TB, j int, opts HeadGenOptions) (*tsdb.Head, testutil.Ok(t, err) for is := 1; is < opts.SamplesPerSeries; is++ { - testutil.Ok(t, app.AddFast(ref, int64(tsLabel+is)*opts.ScrapeInterval.Milliseconds(), opts.Random.Float64())) + _, err := app.Append(ref, nil, int64(tsLabel+is)*opts.ScrapeInterval.Milliseconds(), opts.Random.Float64()) + testutil.Ok(t, err) } } testutil.Ok(t, app.Commit()) diff --git a/pkg/store/tsdb_test.go b/pkg/store/tsdb_test.go index 46ae106465b..605b5dad5ed 100644 --- a/pkg/store/tsdb_test.go +++ b/pkg/store/tsdb_test.go @@ -48,7 +48,7 @@ func TestTSDBStore_Info(t *testing.T) { testutil.Equals(t, int64(math.MaxInt64), resp.MaxTime) app := db.Appender(context.Background()) - _, err = app.Add(labels.FromStrings("a", "a"), 12, 0.1) + _, err = app.Append(0, labels.FromStrings("a", "a"), 12, 0.1) testutil.Ok(t, err) testutil.Ok(t, app.Commit()) @@ -76,7 +76,7 @@ func TestTSDBStore_Series(t *testing.T) { appender := db.Appender(context.Background()) for i := 1; i <= 3; i++ { - _, err = appender.Add(labels.FromStrings("a", "1"), int64(i), float64(i)) + _, err = appender.Append(0, labels.FromStrings("a", "1"), int64(i), float64(i)) testutil.Ok(t, err) } err = appender.Commit() @@ -200,7 +200,7 @@ func TestTSDBStore_LabelNames(t *testing.T) { appender := db.Appender(context.Background()) addLabels := func(lbs []string, timestamp int64) { if len(lbs) > 0 { - _, err = appender.Add(labels.FromStrings(lbs...), timestamp, 1) + _, err = appender.Append(0, labels.FromStrings(lbs...), timestamp, 1) testutil.Ok(t, err) } } @@ -306,7 +306,7 @@ func TestTSDBStore_LabelValues(t *testing.T) { appender := db.Appender(context.Background()) addLabels := func(lbs []string, timestamp int64) { if len(lbs) > 0 { - _, err = appender.Add(labels.FromStrings(lbs...), timestamp, 1) + _, err = appender.Append(0, labels.FromStrings(lbs...), timestamp, 1) testutil.Ok(t, err) } } diff --git a/pkg/testutil/e2eutil/prometheus.go b/pkg/testutil/e2eutil/prometheus.go index 42ca211aaa9..edd0c1d07dc 100644 --- a/pkg/testutil/e2eutil/prometheus.go +++ b/pkg/testutil/e2eutil/prometheus.go @@ -445,7 +445,7 @@ func createBlock( app := h.Appender(ctx) for _, lset := range batch { - _, err := app.Add(lset, t, rand.Float64()) + _, err := app.Append(0, lset, t, rand.Float64()) if err != nil { if rerr := app.Rollback(); rerr != nil { err = errors.Wrapf(err, "rollback failed: %v", rerr) diff --git a/pkg/tls/options.go b/pkg/tls/options.go index b7e102cccf6..ec73d4269e7 100644 --- a/pkg/tls/options.go +++ b/pkg/tls/options.go @@ -61,7 +61,7 @@ func NewServerConfig(logger log.Logger, cert, key, clientCA string) (*tls.Config } // NewClientConfig provides new client TLS configuration. -func NewClientConfig(logger log.Logger, cert, key, caCert, serverName string) (*tls.Config, error) { +func NewClientConfig(logger log.Logger, cert, key, caCert, serverName string, skipVerify bool) (*tls.Config, error) { var certPool *x509.CertPool if caCert != "" { caPEM, err := ioutil.ReadFile(caCert) @@ -91,6 +91,10 @@ func NewClientConfig(logger log.Logger, cert, key, caCert, serverName string) (* tlsCfg.ServerName = serverName } + if skipVerify { + tlsCfg.InsecureSkipVerify = true + } + if (key != "") != (cert != "") { return nil, errors.New("both client key and certificate must be provided") } diff --git a/scripts/genproto.sh b/scripts/genproto.sh index 83f1f7da904..29482d8fc38 100755 --- a/scripts/genproto.sh +++ b/scripts/genproto.sh @@ -25,7 +25,7 @@ PATH=${PATH}:/tmp/protobin GOGOPROTO_ROOT="$(GO111MODULE=on go list -modfile=.bingo/protoc-gen-gogofast.mod -f '{{ .Dir }}' -m github.com/gogo/protobuf)" GOGOPROTO_PATH="${GOGOPROTO_ROOT}:${GOGOPROTO_ROOT}/protobuf" -DIRS="store/storepb/ store/storepb/prompb/ store/labelpb rules/rulespb store/hintspb queryfrontend metadata/metadatapb" +DIRS="store/storepb/ store/storepb/prompb/ store/labelpb rules/rulespb store/hintspb queryfrontend metadata/metadatapb exemplars/exemplarspb" echo "generating code" pushd "pkg" for dir in ${DIRS}; do diff --git a/test/e2e/compact_test.go b/test/e2e/compact_test.go index a1e206f08b6..1934af23935 100644 --- a/test/e2e/compact_test.go +++ b/test/e2e/compact_test.go @@ -442,7 +442,7 @@ func TestCompactWithStoreGateway(t *testing.T) { testutil.Ok(t, str.WaitSumMetrics(e2e.Equals(0), "thanos_blocks_meta_sync_failures_total")) testutil.Ok(t, str.WaitSumMetrics(e2e.Equals(0), "thanos_blocks_meta_modified")) - q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", []string{str.GRPCNetworkEndpoint()}, nil, nil, nil, "", "") + q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", []string{str.GRPCNetworkEndpoint()}, nil, nil, nil, nil, "", "") testutil.Ok(t, err) testutil.Ok(t, s.StartAndWaitReady(q)) diff --git a/test/e2e/e2ethanos/services.go b/test/e2e/e2ethanos/services.go index 2cb490fb1a2..9e73239a935 100644 --- a/test/e2e/e2ethanos/services.go +++ b/test/e2e/e2ethanos/services.go @@ -66,16 +66,17 @@ func NewPrometheus(sharedDir string, name string, config, promImage string) (*e2 return nil, "", errors.Wrap(err, "creating prom config failed") } + args := e2e.BuildArgs(map[string]string{ + "--config.file": filepath.Join(container, "prometheus.yml"), + "--storage.tsdb.path": container, + "--storage.tsdb.max-block-duration": "2h", + "--log.level": infoLogLevel, + "--web.listen-address": ":9090", + }) prom := e2e.NewHTTPService( fmt.Sprintf("prometheus-%s", name), promImage, - e2e.NewCommandWithoutEntrypoint("prometheus", e2e.BuildArgs(map[string]string{ - "--config.file": filepath.Join(container, "prometheus.yml"), - "--storage.tsdb.path": container, - "--storage.tsdb.max-block-duration": "2h", - "--log.level": infoLogLevel, - "--web.listen-address": ":9090", - })...), + e2e.NewCommandWithoutEntrypoint("prometheus", args...), e2e.NewHTTPReadinessProbe(9090, "/-/ready", 200, 200), 9090, ) @@ -114,7 +115,7 @@ func NewPrometheusWithSidecar(sharedDir string, netName string, name string, con return prom, sidecar, nil } -func NewQuerier(sharedDir, name string, storeAddresses, fileSDStoreAddresses, ruleAddresses, metadataAddresses []string, routePrefix, externalPrefix string) (*Service, error) { +func NewQuerier(sharedDir, name string, storeAddresses, fileSDStoreAddresses, ruleAddresses, metadataAddresses, exemplarAddresses []string, routePrefix, externalPrefix string) (*Service, error) { const replicaLabel = "replica" args := e2e.BuildArgs(map[string]string{ @@ -140,6 +141,10 @@ func NewQuerier(sharedDir, name string, storeAddresses, fileSDStoreAddresses, ru args = append(args, "--metadata="+addr) } + for _, addr := range exemplarAddresses { + args = append(args, "--exemplar="+addr) + } + if len(fileSDStoreAddresses) > 0 { queryFileSDDir := filepath.Join(sharedDir, "data", "querier", name) container := filepath.Join(e2e.ContainerSharedDir, "data", "querier", name) diff --git a/test/e2e/exemplars_api_test.go b/test/e2e/exemplars_api_test.go new file mode 100644 index 00000000000..33437078b58 --- /dev/null +++ b/test/e2e/exemplars_api_test.go @@ -0,0 +1,64 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package e2e_test + +import ( + "context" + "testing" + "time" + + "github.com/cortexproject/cortex/integration/e2e" + "github.com/thanos-io/thanos/pkg/testutil" + "github.com/thanos-io/thanos/test/e2e/e2ethanos" +) + +func TestExemplarsAPI_Fanout(t *testing.T) { + t.Parallel() + + netName := "e2e_test_exemplars_fanout" + + s, err := e2e.NewScenario(netName) + testutil.Ok(t, err) + t.Cleanup(e2ethanos.CleanScenario(t, s)) + + // 2x Prometheus. + prom1, sidecar1, err := e2ethanos.NewPrometheusWithSidecar( + s.SharedDir(), + netName, + "prom1", + defaultPromConfig("ha", 0, "", ""), + e2ethanos.DefaultPrometheusImage(), + ) + testutil.Ok(t, err) + prom2, sidecar2, err := e2ethanos.NewPrometheusWithSidecar( + s.SharedDir(), + netName, + "prom2", + defaultPromConfig("ha", 1, "", ""), + e2ethanos.DefaultPrometheusImage(), + ) + testutil.Ok(t, err) + testutil.Ok(t, s.StartAndWaitReady(prom1, sidecar1, prom2, sidecar2)) + + q, err := e2ethanos.NewQuerier( + s.SharedDir(), + "query", + []string{sidecar1.GRPCNetworkEndpoint(), sidecar2.GRPCNetworkEndpoint()}, + nil, + nil, + nil, + []string{sidecar1.GRPCNetworkEndpoint(), sidecar2.GRPCNetworkEndpoint()}, + "", + "", + ) + + testutil.Ok(t, err) + testutil.Ok(t, s.StartAndWaitReady(q)) + + _, cancel := context.WithTimeout(context.Background(), 1*time.Minute) + t.Cleanup(cancel) + + testutil.Ok(t, q.WaitSumMetricsWithOptions(e2e.Equals(2), []string{"thanos_store_nodes_grpc_connections"}, e2e.WaitMissingMetrics)) + testutil.Ok(t, q.WaitSumMetricsWithOptions(e2e.Equals(2), []string{"thanos_query_exemplar_apis_dns_provider_results"}, e2e.WaitMissingMetrics)) +} diff --git a/test/e2e/metadata_api_test.go b/test/e2e/metadata_api_test.go index 6211525f0cf..4a15b8a9d8c 100644 --- a/test/e2e/metadata_api_test.go +++ b/test/e2e/metadata_api_test.go @@ -51,6 +51,7 @@ func TestMetadataAPI_Fanout(t *testing.T) { nil, nil, []string{sidecar1.GRPCNetworkEndpoint(), sidecar2.GRPCNetworkEndpoint()}, + nil, "", "", ) diff --git a/test/e2e/query_frontend_test.go b/test/e2e/query_frontend_test.go index 27dd9b123d0..d80f7168f76 100644 --- a/test/e2e/query_frontend_test.go +++ b/test/e2e/query_frontend_test.go @@ -34,7 +34,7 @@ func TestQueryFrontend(t *testing.T) { testutil.Ok(t, err) testutil.Ok(t, s.StartAndWaitReady(prom, sidecar)) - q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", []string{sidecar.GRPCNetworkEndpoint()}, nil, nil, nil, "", "") + q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", []string{sidecar.GRPCNetworkEndpoint()}, nil, nil, nil, nil, "", "") testutil.Ok(t, err) testutil.Ok(t, s.StartAndWaitReady(q)) @@ -388,7 +388,7 @@ func TestQueryFrontendMemcachedCache(t *testing.T) { testutil.Ok(t, err) testutil.Ok(t, s.StartAndWaitReady(prom, sidecar)) - q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", []string{sidecar.GRPCNetworkEndpoint()}, nil, nil, nil, "", "") + q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", []string{sidecar.GRPCNetworkEndpoint()}, nil, nil, nil, nil, "", "") testutil.Ok(t, err) testutil.Ok(t, s.StartAndWaitReady(q)) diff --git a/test/e2e/query_test.go b/test/e2e/query_test.go index 00215e01f6c..c12ddd368a1 100644 --- a/test/e2e/query_test.go +++ b/test/e2e/query_test.go @@ -108,7 +108,7 @@ func TestQuery(t *testing.T) { testutil.Ok(t, s.StartAndWaitReady(prom1, sidecar1, prom2, sidecar2, prom3, sidecar3, prom4, sidecar4)) // Querier. Both fileSD and directly by flags. - q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", []string{sidecar1.GRPCNetworkEndpoint(), sidecar2.GRPCNetworkEndpoint(), receiver.GRPCNetworkEndpoint()}, []string{sidecar3.GRPCNetworkEndpoint(), sidecar4.GRPCNetworkEndpoint()}, nil, nil, "", "") + q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", []string{sidecar1.GRPCNetworkEndpoint(), sidecar2.GRPCNetworkEndpoint(), receiver.GRPCNetworkEndpoint()}, []string{sidecar3.GRPCNetworkEndpoint(), sidecar4.GRPCNetworkEndpoint()}, nil, nil, nil, "", "") testutil.Ok(t, err) testutil.Ok(t, s.StartAndWaitReady(q)) @@ -189,6 +189,7 @@ func TestQueryExternalPrefixWithoutReverseProxy(t *testing.T) { nil, nil, nil, + nil, "", externalPrefix, ) @@ -213,6 +214,7 @@ func TestQueryExternalPrefix(t *testing.T) { nil, nil, nil, + nil, "", externalPrefix, ) @@ -243,6 +245,7 @@ func TestQueryExternalPrefixAndRoutePrefix(t *testing.T) { nil, nil, nil, + nil, routePrefix, externalPrefix, ) @@ -281,6 +284,7 @@ func TestQueryLabelNames(t *testing.T) { []string{}, nil, nil, + nil, "", "", ) @@ -340,6 +344,7 @@ func TestQueryLabelValues(t *testing.T) { []string{}, nil, nil, + nil, "", "", ) diff --git a/test/e2e/receive_test.go b/test/e2e/receive_test.go index 9aa599f40c7..fffef5b65c6 100644 --- a/test/e2e/receive_test.go +++ b/test/e2e/receive_test.go @@ -102,7 +102,7 @@ func TestReceive(t *testing.T) { testutil.Ok(t, err) testutil.Ok(t, s.StartAndWaitReady(prom1, prom2, prom3)) - q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", []string{r1.GRPCNetworkEndpoint(), r2.GRPCNetworkEndpoint(), r3.GRPCNetworkEndpoint()}, nil, nil, nil, "", "") + q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", []string{r1.GRPCNetworkEndpoint(), r2.GRPCNetworkEndpoint(), r3.GRPCNetworkEndpoint()}, nil, nil, nil, nil, "", "") testutil.Ok(t, err) testutil.Ok(t, s.StartAndWaitReady(q)) @@ -178,7 +178,7 @@ func TestReceive(t *testing.T) { testutil.Ok(t, err) testutil.Ok(t, s.StartAndWaitReady(prom1, prom2, prom3)) - q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", []string{r1.GRPCNetworkEndpoint(), r2.GRPCNetworkEndpoint(), r3.GRPCNetworkEndpoint()}, nil, nil, nil, "", "") + q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", []string{r1.GRPCNetworkEndpoint(), r2.GRPCNetworkEndpoint(), r3.GRPCNetworkEndpoint()}, nil, nil, nil, nil, "", "") testutil.Ok(t, err) testutil.Ok(t, s.StartAndWaitReady(q)) @@ -253,7 +253,7 @@ func TestReceive(t *testing.T) { testutil.Ok(t, err) testutil.Ok(t, s.StartAndWaitReady(prom1)) - q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", []string{r1.GRPCNetworkEndpoint(), r2.GRPCNetworkEndpoint(), r3.GRPCNetworkEndpoint()}, nil, nil, nil, "", "") + q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", []string{r1.GRPCNetworkEndpoint(), r2.GRPCNetworkEndpoint(), r3.GRPCNetworkEndpoint()}, nil, nil, nil, nil, "", "") testutil.Ok(t, err) testutil.Ok(t, s.StartAndWaitReady(q)) @@ -325,7 +325,7 @@ func TestReceive(t *testing.T) { testutil.Ok(t, err) testutil.Ok(t, s.StartAndWaitReady(prom1)) - q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", []string{r1.GRPCNetworkEndpoint(), r2.GRPCNetworkEndpoint()}, nil, nil, nil, "", "") + q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", []string{r1.GRPCNetworkEndpoint(), r2.GRPCNetworkEndpoint()}, nil, nil, nil, nil, "", "") testutil.Ok(t, err) testutil.Ok(t, s.StartAndWaitReady(q)) @@ -400,7 +400,7 @@ func TestReceive(t *testing.T) { testutil.Ok(t, s.StartAndWaitReady(prom1)) testutil.Ok(t, s.StartAndWaitReady(prom2)) - q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", []string{r1.GRPCNetworkEndpoint()}, nil, nil, nil, "", "") + q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", []string{r1.GRPCNetworkEndpoint()}, nil, nil, nil, nil, "", "") testutil.Ok(t, err) testutil.Ok(t, s.StartAndWaitReady(q)) ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) diff --git a/test/e2e/rule_test.go b/test/e2e/rule_test.go index 0417ca6e299..5ea489298b0 100644 --- a/test/e2e/rule_test.go +++ b/test/e2e/rule_test.go @@ -291,7 +291,7 @@ func TestRule_AlertmanagerHTTPClient(t *testing.T) { { EndpointsConfig: http_util.EndpointsConfig{ StaticAddresses: func() []string { - q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", nil, nil, nil, nil, "", "") + q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", nil, nil, nil, nil, nil, "", "") testutil.Ok(t, err) return []string{q.NetworkHTTPEndpointFor(s.NetworkName())} }(), @@ -302,7 +302,7 @@ func TestRule_AlertmanagerHTTPClient(t *testing.T) { testutil.Ok(t, err) testutil.Ok(t, s.StartAndWaitReady(r)) - q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", []string{r.GRPCNetworkEndpoint()}, nil, nil, nil, "", "") + q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", []string{r.GRPCNetworkEndpoint()}, nil, nil, nil, nil, "", "") testutil.Ok(t, err) testutil.Ok(t, s.StartAndWaitReady(q)) @@ -383,7 +383,7 @@ func TestRule(t *testing.T) { testutil.Ok(t, err) testutil.Ok(t, s.StartAndWaitReady(r)) - q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", []string{r.GRPCNetworkEndpoint()}, nil, nil, nil, "", "") + q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", []string{r.GRPCNetworkEndpoint()}, nil, nil, nil, nil, "", "") testutil.Ok(t, err) testutil.Ok(t, s.StartAndWaitReady(q)) diff --git a/test/e2e/rules_api_test.go b/test/e2e/rules_api_test.go index 67d18c2b410..594da162323 100644 --- a/test/e2e/rules_api_test.go +++ b/test/e2e/rules_api_test.go @@ -70,6 +70,7 @@ func TestRulesAPI_Fanout(t *testing.T) { nil, []string{sidecar1.GRPCNetworkEndpoint(), sidecar2.GRPCNetworkEndpoint(), r1.GRPCNetworkEndpoint(), r2.GRPCNetworkEndpoint()}, nil, + nil, "", "", ) diff --git a/test/e2e/store_gateway_test.go b/test/e2e/store_gateway_test.go index 1cf4d61341f..d25c9ca19c9 100644 --- a/test/e2e/store_gateway_test.go +++ b/test/e2e/store_gateway_test.go @@ -62,7 +62,7 @@ func TestStoreGateway(t *testing.T) { // Ensure bucket UI. ensureGETStatusCode(t, http.StatusOK, "http://"+path.Join(s1.HTTPEndpoint(), "loaded")) - q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", []string{s1.GRPCNetworkEndpoint()}, nil, nil, nil, "", "") + q, err := e2ethanos.NewQuerier(s.SharedDir(), "1", []string{s1.GRPCNetworkEndpoint()}, nil, nil, nil, nil, "", "") testutil.Ok(t, err) testutil.Ok(t, s.StartAndWaitReady(q))