diff --git a/go.mod b/go.mod index 3ab560df..80d11208 100644 --- a/go.mod +++ b/go.mod @@ -4,18 +4,18 @@ go 1.21 toolchain go1.21.5 -require dagger.io/dagger v0.9.4 +require dagger.io/dagger v0.9.5 require ( github.com/99designs/gqlgen v0.17.41 // indirect github.com/Khan/genqlient v0.6.0 // indirect github.com/adrg/xdg v0.4.0 // indirect - github.com/google/uuid v1.4.0 // indirect + github.com/google/uuid v1.5.0 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/sosodev/duration v1.2.0 // indirect github.com/stretchr/testify v1.8.4 // indirect github.com/vektah/gqlparser/v2 v2.5.10 // indirect - golang.org/x/exp v0.0.0-20231206192017-f3f8817b8deb // indirect + golang.org/x/exp v0.0.0-20231219180239-dc181d75b848 // indirect golang.org/x/sync v0.5.0 // indirect golang.org/x/sys v0.15.0 // indirect ) diff --git a/go.sum b/go.sum index ba4a0437..41104906 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,5 @@ -dagger.io/dagger v0.9.4 h1:0GNTLq1RlTOcwq6u3qMg/hLFBf2t+OlPeq9OYstLE1E= -dagger.io/dagger v0.9.4/go.mod h1:ic2UD6gS5iBp2e6VWPxyb7h6VpAyhFN6U7/TDlriox8= +dagger.io/dagger v0.9.5 h1:44ypKgOjKZKrzqHSoa/wIfqnrSZM87tuoAKJpo5tGV4= +dagger.io/dagger v0.9.5/go.mod h1:ic2UD6gS5iBp2e6VWPxyb7h6VpAyhFN6U7/TDlriox8= github.com/99designs/gqlgen v0.17.41 h1:C1/zYMhGVP5TWNCNpmZ9Mb6CqT1Vr5SHEWoTOEJ3v3I= github.com/99designs/gqlgen v0.17.41/go.mod h1:GQ6SyMhwFbgHR0a8r2Wn8fYgEwPxxmndLFPhU63+cJE= github.com/Khan/genqlient v0.6.0 h1:Bwb1170ekuNIVIwTJEqvO8y7RxBxXu639VJOkKSrwAk= @@ -11,8 +11,8 @@ github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/google/uuid v1.4.0 h1:MtMxsa51/r9yyhkyLsVeVt0B+BGQZzpQiTQ4eHZ8bc4= -github.com/google/uuid v1.4.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU= +github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -27,8 +27,8 @@ github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcU github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/vektah/gqlparser/v2 v2.5.10 h1:6zSM4azXC9u4Nxy5YmdmGu4uKamfwsdKTwp5zsEealU= github.com/vektah/gqlparser/v2 v2.5.10/go.mod h1:1rCcfwB2ekJofmluGWXMSEnPMZgbxzwj6FaZ/4OT8Cc= -golang.org/x/exp v0.0.0-20231206192017-f3f8817b8deb h1:c0vyKkb6yr3KR7jEfJaOSv4lG7xPkbN6r52aJz1d8a8= -golang.org/x/exp v0.0.0-20231206192017-f3f8817b8deb/go.mod h1:iRJReGqOEeBhDZGkGbynYwcHlctCvnjTYIamk7uXpHI= +golang.org/x/exp v0.0.0-20231219180239-dc181d75b848 h1:+iq7lrkxmFNBM7xx+Rae2W6uyPfhPeDWD+n+JgppptE= +golang.org/x/exp v0.0.0-20231219180239-dc181d75b848/go.mod h1:iRJReGqOEeBhDZGkGbynYwcHlctCvnjTYIamk7uXpHI= golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE= golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/kubernetes/common/grafana/k8s-all-in-one.yaml b/kubernetes/common/grafana/k8s-all-in-one.yaml index 94dc64cd..40d09598 100644 --- a/kubernetes/common/grafana/k8s-all-in-one.yaml +++ b/kubernetes/common/grafana/k8s-all-in-one.yaml @@ -11,7 +11,7 @@ metadata: app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: grafana app.kubernetes.io/version: 10.2.2 - helm.sh/chart: grafana-7.0.15 + helm.sh/chart: grafana-7.0.19 name: grafana namespace: monitoring-system --- @@ -23,7 +23,7 @@ metadata: app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: grafana app.kubernetes.io/version: 10.2.2 - helm.sh/chart: grafana-7.0.15 + helm.sh/chart: grafana-7.0.19 name: grafana namespace: monitoring-system rules: [] @@ -36,7 +36,7 @@ metadata: app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: grafana app.kubernetes.io/version: 10.2.2 - helm.sh/chart: grafana-7.0.15 + helm.sh/chart: grafana-7.0.19 name: grafana-clusterrole rules: - apiGroups: @@ -57,7 +57,7 @@ metadata: app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: grafana app.kubernetes.io/version: 10.2.2 - helm.sh/chart: grafana-7.0.15 + helm.sh/chart: grafana-7.0.19 name: grafana namespace: monitoring-system roleRef: @@ -77,7 +77,7 @@ metadata: app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: grafana app.kubernetes.io/version: 10.2.2 - helm.sh/chart: grafana-7.0.15 + helm.sh/chart: grafana-7.0.19 name: grafana-clusterrolebinding roleRef: apiGroup: rbac.authorization.k8s.io @@ -156,7 +156,7 @@ metadata: app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: grafana app.kubernetes.io/version: 10.2.2 - helm.sh/chart: grafana-7.0.15 + helm.sh/chart: grafana-7.0.19 name: grafana-config-dashboards namespace: monitoring-system --- @@ -3465,7 +3465,7 @@ metadata: app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: grafana app.kubernetes.io/version: 10.2.2 - helm.sh/chart: grafana-7.0.15 + helm.sh/chart: grafana-7.0.19 name: grafana namespace: monitoring-system spec: @@ -3487,7 +3487,7 @@ metadata: app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: grafana app.kubernetes.io/version: 10.2.2 - helm.sh/chart: grafana-7.0.15 + helm.sh/chart: grafana-7.0.19 name: grafana namespace: monitoring-system spec: @@ -3502,9 +3502,8 @@ spec: template: metadata: annotations: - checksum/config: 01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b - checksum/dashboards-json-config: 01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b - checksum/sc-dashboard-provider-config: bdf4f72457c27d90339e495afe35c93f33240b1ab1ae0206f6f20cf91dd89563 + checksum/config: 1120aef9f995c07acee41c7a4606678ccfc19213f1be038c204f23a193a59ee2 + checksum/sc-dashboard-provider-config: a0c14ec48c732bce9563a691da03becdcb64b4ad0056b890bee49de38ae261d2 kubectl.kubernetes.io/default-container: grafana logs.agent.grafana.com/scrape: "true" logs.agent.grafana.com/scrub-level: debug @@ -3696,7 +3695,7 @@ metadata: app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: grafana app.kubernetes.io/version: 10.2.2 - helm.sh/chart: grafana-7.0.15 + helm.sh/chart: grafana-7.0.19 name: grafana namespace: monitoring-system spec: @@ -3724,7 +3723,7 @@ metadata: app.kubernetes.io/managed-by: Helm app.kubernetes.io/name: grafana app.kubernetes.io/version: 10.2.2 - helm.sh/chart: grafana-7.0.15 + helm.sh/chart: grafana-7.0.19 name: grafana namespace: monitoring-system spec: diff --git a/kubernetes/common/grafana/kustomization.yaml b/kubernetes/common/grafana/kustomization.yaml index 66fd8148..5b38e8c8 100644 --- a/kubernetes/common/grafana/kustomization.yaml +++ b/kubernetes/common/grafana/kustomization.yaml @@ -5,7 +5,6 @@ namespace: monitoring-system helmCharts: - name: grafana - version: 7.0.15 repo: https://grafana.github.io/helm-charts releaseName: grafana namespace: monitoring-system @@ -16,10 +15,6 @@ helmCharts: resources: - namespace.yaml -images: -- name: docker.io/grafana/grafana - newTag: 10.2.2 - configMapGenerator: - name: grafana files: diff --git a/monitoring-mixins/agent-flow-mixin/jsonnetfile.lock.json b/monitoring-mixins/agent-flow-mixin/jsonnetfile.lock.json index 8f5007b3..98280076 100644 --- a/monitoring-mixins/agent-flow-mixin/jsonnetfile.lock.json +++ b/monitoring-mixins/agent-flow-mixin/jsonnetfile.lock.json @@ -8,7 +8,7 @@ "subdir": "operations/agent-flow-mixin" } }, - "version": "5c0fa9bf270beafcabfe46f9966f5b2c2dc84fff", + "version": "5e9df54372c6d1115abb5bb0a7e6b3fb86e6b03e", "sum": "TyMxO1sQ2p21MKAYqufOO5HVp/UtTyMBMLmBK4kM6Pw=" } ], diff --git a/monitoring-mixins/k8s-all-in-one.yaml b/monitoring-mixins/k8s-all-in-one.yaml index 7daa841f..62b0e18a 100644 --- a/monitoring-mixins/k8s-all-in-one.yaml +++ b/monitoring-mixins/k8s-all-in-one.yaml @@ -3534,7 +3534,6 @@ data: { "expr": "sum(loki_ingester_memory_chunks{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "series", "legendLink": null } @@ -3609,7 +3608,6 @@ data: { "expr": "sum(loki_ingester_memory_chunks{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}) / sum(loki_ingester_memory_streams{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "chunks", "legendLink": null } @@ -3696,21 +3694,18 @@ data: { "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th Percentile", "refId": "A" }, { "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th Percentile", "refId": "B" }, { "expr": "sum(rate(loki_ingester_chunk_utilization_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_utilization_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C" } @@ -3785,21 +3780,18 @@ data: { "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th Percentile", "refId": "A" }, { "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th Percentile", "refId": "B" }, { "expr": "sum(rate(loki_ingester_chunk_age_seconds_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) * 1e3 / sum(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C" } @@ -3886,21 +3878,18 @@ data: { "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_entries_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th Percentile", "refId": "A" }, { "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_entries_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th Percentile", "refId": "B" }, { "expr": "sum(rate(loki_ingester_chunk_entries_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_entries_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C" } @@ -3975,7 +3964,6 @@ data: { "expr": "sum(rate(loki_chunk_store_index_entries_per_chunk_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) / sum(rate(loki_chunk_store_index_entries_per_chunk_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Index Entries", "legendLink": null } @@ -4060,9 +4048,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "cortex_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}", + "expr": "loki_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"} or cortex_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -4110,6 +4097,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -4144,9 +4132,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -4233,7 +4220,6 @@ data: { "expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -4308,7 +4294,6 @@ data: { "expr": "sum by (reason) (rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{reason}}", "legendLink": null } @@ -4519,21 +4504,18 @@ data: { "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[1m])) by (le))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "p99", "legendLink": null }, { "expr": "histogram_quantile(0.90, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[1m])) by (le))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "p90", "legendLink": null }, { "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[1m])) by (le))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "p50", "legendLink": null } @@ -4620,21 +4602,18 @@ data: { "expr": "histogram_quantile(0.5, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) by (le))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "p50", "legendLink": null }, { "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) by (le))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "p99", "legendLink": null }, { "expr": "sum(rate(loki_ingester_chunk_bounds_hours_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) / sum(rate(loki_ingester_chunk_bounds_hours_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "avg", "legendLink": null } @@ -4697,7 +4676,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -4863,7 +4842,6 @@ data: "expr": "sum(loki_compactor_pending_delete_requests_count{cluster=~\"$cluster\", namespace=~\"$namespace\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -4939,7 +4917,6 @@ data: "expr": "max(loki_compactor_oldest_pending_delete_request_age_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -5025,7 +5002,6 @@ data: { "expr": "(loki_compactor_delete_requests_received_total{cluster=~\"$cluster\", namespace=~\"$namespace\"} or on() vector(0)) - on () (loki_compactor_delete_requests_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"} or on () vector(0))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "in progress", "legendLink": null } @@ -5100,7 +5076,6 @@ data: { "expr": "sum(increase(loki_compactor_delete_requests_received_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "received", "legendLink": null } @@ -5175,7 +5150,6 @@ data: { "expr": "sum(increase(loki_compactor_delete_requests_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "processed", "legendLink": null } @@ -5262,7 +5236,6 @@ data: { "expr": "node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"}", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -5337,7 +5310,6 @@ data: { "expr": "go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} / 1024 / 1024 ", "format": "time_series", - "intervalFactor": 2, "legendFormat": " {{pod}} ", "legendLink": null } @@ -5412,7 +5384,6 @@ data: { "expr": "loki_boltdb_shipper_compact_tables_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -5499,7 +5470,6 @@ data: { "expr": "sum(increase(loki_compactor_load_pending_requests_attempts_total{status=\"fail\", cluster=~\"$cluster\", namespace=~\"$namespace\"}[1h]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "failures", "legendLink": null } @@ -5574,7 +5544,6 @@ data: { "expr": "sum(rate(loki_compactor_deleted_lines{cluster=~\"$cluster\",job=~\"$namespace/compactor\"}[$__rate_interval])) by (user)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{user}}", "legendLink": null } @@ -5675,7 +5644,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -5856,7 +5825,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5943,7 +5912,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6029,7 +5998,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6115,7 +6084,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6201,7 +6170,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6287,7 +6256,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6374,7 +6343,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6461,7 +6430,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6566,7 +6535,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6640,7 +6609,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -7463,7 +7432,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -7700,7 +7669,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -7796,7 +7765,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -7844,15 +7813,16 @@ data: "gridPos": { "h": 5, "w": 4, - "x": 12, + "x": 8, "y": 1 }, "hiddenSeries": false, - "id": 2, - "interval": "", + "id": 11, "legend": { "avg": false, "current": false, + "hideEmpty": false, + "hideZero": false, "max": false, "min": false, "show": false, @@ -7876,8 +7846,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "topk(10, sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant))", - "legendFormat": "{{tenant}}", + "expr": "topk(5, sum by (name,level) (rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\"}[$__interval])) - \nsum by (name,level) (rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\"}[$__interval] offset 1h)))", + "legendFormat": "{{name}}-{{level}}", "refId": "A" } ], @@ -7885,13 +7855,13 @@ data: "timeFrom": null, "timeRegions": [ ], "timeShift": null, - "title": "Lines Per Tenant (top 10)", + "title": "Bad Words", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -7939,16 +7909,15 @@ data: "gridPos": { "h": 5, "w": 4, - "x": 16, + "x": 12, "y": 1 }, "hiddenSeries": false, - "id": 4, + "id": 2, + "interval": "", "legend": { "avg": false, "current": false, - "hideEmpty": true, - "hideZero": true, "max": false, "min": false, "show": false, @@ -7972,7 +7941,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "topk(10, sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant)) / 1024 / 1024", + "expr": "topk(10, sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant))", "legendFormat": "{{tenant}}", "refId": "A" } @@ -7981,13 +7950,13 @@ data: "timeFrom": null, "timeRegions": [ ], "timeShift": null, - "title": "MBs Per Tenant (Top 10)", + "title": "Lines Per Tenant (top 10)", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -8035,14 +8004,16 @@ data: "gridPos": { "h": 5, "w": 4, - "x": 20, + "x": 16, "y": 1 }, "hiddenSeries": false, - "id": 24, + "id": 4, "legend": { "avg": false, "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, "show": false, @@ -8066,24 +8037,22 @@ data: "steppedLine": false, "targets": [ { - "expr": "increase(kube_pod_container_status_restarts_total{cluster=\"$cluster\", namespace=\"$namespace\"}[10m]) > 0", - "hide": false, - "interval": "", - "legendFormat": "{{container}}-{{pod}}", - "refId": "B" + "expr": "topk(10, sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant)) / 1024 / 1024", + "legendFormat": "{{tenant}}", + "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeRegions": [ ], "timeShift": null, - "title": "Container Restarts", + "title": "MBs Per Tenant (Top 10)", "tooltip": { - "shared": true, - "sort": 2, + "shared": false, + "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -8129,21 +8098,19 @@ data: "fill": 1, "fillGradient": 0, "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 6 + "h": 5, + "w": 4, + "x": 20, + "y": 1 }, "hiddenSeries": false, - "id": 9, + "id": 24, "legend": { - "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": true, - "show": true, + "show": false, "total": false, "values": false }, @@ -8164,32 +8131,24 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", - "legendFormat": ".99", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.75, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", - "legendFormat": ".9", + "expr": "increase(kube_pod_container_status_restarts_total{cluster=\"$cluster\", namespace=\"$namespace\"}[10m]) > 0", + "hide": false, + "interval": "", + "legendFormat": "{{container}}-{{pod}}", "refId": "B" - }, - { - "expr": "histogram_quantile(0.5, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", - "legendFormat": ".5", - "refId": "C" } ], "thresholds": [ ], "timeFrom": null, "timeRegions": [ ], "timeShift": null, - "title": "Push Latency", + "title": "Container Restarts", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -8235,13 +8194,13 @@ data: "fill": 1, "fillGradient": 0, "gridPos": { - "h": 5, - "w": 6, - "x": 12, + "h": 10, + "w": 12, + "x": 0, "y": 6 }, "hiddenSeries": false, - "id": 12, + "id": 9, "legend": { "alignAsTable": true, "avg": false, @@ -8249,7 +8208,7 @@ data: "max": false, "min": false, "rightSide": true, - "show": false, + "show": true, "total": false, "values": false }, @@ -8270,17 +8229,17 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".99", "refId": "A" }, { - "expr": "histogram_quantile(0.9, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.75, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".9", "refId": "B" }, { - "expr": "histogram_quantile(0.5, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.5, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".5", "refId": "C" } @@ -8289,13 +8248,13 @@ data: "timeFrom": null, "timeRegions": [ ], "timeShift": null, - "title": "Distributor Latency", + "title": "Push Latency", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -8326,104 +8285,6 @@ data: "alignLevel": null } }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { } - }, - "overrides": [ ] - }, - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 6, - "x": 18, - "y": 6 - }, - "hiddenSeries": false, - "id": 71, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [ ] - }, - "panels": [ ], - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\"}[$__rate_interval])) by (route) > 0", - "interval": "", - "legendFormat": "{{route}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeRegions": [ ], - "timeShift": null, - "title": "Distributor Success Rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "decimals": null, - "format": "percentunit", - "label": "", - "logBase": 1, - "max": "1", - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, { "aliasColors": { }, "bars": false, @@ -8442,10 +8303,10 @@ data: "h": 5, "w": 6, "x": 12, - "y": 11 + "y": 6 }, "hiddenSeries": false, - "id": 13, + "id": 12, "legend": { "alignAsTable": true, "avg": false, @@ -8474,19 +8335,17 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester.*\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".99", "refId": "A" }, { - "expr": "histogram_quantile(0.9, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester.*\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", - "hide": false, + "expr": "histogram_quantile(0.9, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".9", "refId": "B" }, { - "expr": "histogram_quantile(0.5, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester.*\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", - "hide": false, + "expr": "histogram_quantile(0.5, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".5", "refId": "C" } @@ -8495,13 +8354,219 @@ data: "timeFrom": null, "timeRegions": [ ], "timeShift": null, - "title": "Ingester Latency Write", + "title": "Distributor Latency", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { } + }, + "overrides": [ ] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 6 + }, + "hiddenSeries": false, + "id": 71, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/distributor\"}[$__rate_interval])) by (route) > 0", + "interval": "", + "legendFormat": "{{route}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Distributor Success Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { } + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 11 + }, + "hiddenSeries": false, + "id": 13, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester.*\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", + "legendFormat": ".99", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.9, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester.*\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", + "hide": false, + "legendFormat": ".9", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.5, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester.*\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", + "hide": false, + "legendFormat": ".5", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Ingester Latency Write", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -8598,7 +8663,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -8707,7 +8772,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -8813,7 +8878,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -8910,7 +8975,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -9018,7 +9083,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -9115,7 +9180,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -9216,7 +9281,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -9413,7 +9478,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -9505,7 +9570,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -9597,7 +9662,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -9714,7 +9779,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -9803,7 +9868,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -9892,7 +9957,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -9981,7 +10046,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -10086,7 +10151,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -10178,7 +10243,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -10270,7 +10335,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -10387,7 +10452,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -10494,7 +10559,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -10585,7 +10650,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -10701,7 +10766,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -10854,7 +10919,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -11026,7 +11091,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -11118,7 +11183,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -11210,7 +11275,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -11327,7 +11392,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -11447,7 +11512,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -11539,7 +11604,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -11659,7 +11724,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -11751,7 +11816,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -11865,7 +11930,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -11968,7 +12033,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -12071,7 +12136,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -12169,7 +12234,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -12259,7 +12324,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -12349,7 +12414,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -12439,7 +12504,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -12529,7 +12594,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -12649,7 +12714,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -12741,7 +12806,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -12829,7 +12894,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -12843,7 +12908,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -12915,7 +12980,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -12929,7 +12994,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -13001,7 +13066,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -13015,7 +13080,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -13087,7 +13152,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -13101,7 +13166,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -13173,17 +13238,17 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", + "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", "legendFormat": ".99", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", + "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", "legendFormat": ".9", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", + "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", "legendFormat": ".5", "refId": "C" } @@ -13198,7 +13263,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -13273,19 +13338,19 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "intervalFactor": 1, "legendFormat": ".99-{{operation}}", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".9-{{operation}}", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".5-{{operation}}", "refId": "C" @@ -13301,7 +13366,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -13376,7 +13441,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", + "expr": "sum(rate(loki_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", "intervalFactor": 1, "legendFormat": "{{status_code}}-{{operation}}", "refId": "A" @@ -13392,7 +13457,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -13511,7 +13576,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -13602,7 +13667,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -13721,7 +13786,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -13812,7 +13877,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -13931,7 +13996,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -14022,7 +14087,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -14074,7 +14139,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -14259,21 +14324,18 @@ data: { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -14357,21 +14419,18 @@ data: { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -14444,7 +14503,6 @@ data: { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -14540,21 +14598,18 @@ data: { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -14638,21 +14693,18 @@ data: { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -14725,7 +14777,6 @@ data: { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/query-scheduler\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -14822,21 +14873,18 @@ data: { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -14921,21 +14969,18 @@ data: { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -15009,7 +15054,6 @@ data: { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/querier\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -15083,7 +15127,6 @@ data: { "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"querier\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}} - {{device}}", "legendLink": null } @@ -15159,7 +15202,6 @@ data: { "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"querier\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}} - {{device}}", "legendLink": null } @@ -15235,7 +15277,6 @@ data: { "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"querier.*\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{persistentvolumeclaim}}", "legendLink": null } @@ -15335,21 +15376,18 @@ data: { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -15434,21 +15472,18 @@ data: { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -15522,7 +15557,6 @@ data: { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/index-gateway\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -15596,7 +15630,6 @@ data: { "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}} - {{device}}", "legendLink": null } @@ -15672,7 +15705,6 @@ data: { "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}} - {{device}}", "legendLink": null } @@ -15748,7 +15780,6 @@ data: { "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"index-gateway.*\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{persistentvolumeclaim}}", "legendLink": null } @@ -15847,21 +15878,18 @@ data: { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -15945,21 +15973,18 @@ data: { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -16032,7 +16057,6 @@ data: { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ingester.+\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -16116,9 +16140,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})", + "expr": "sum by(pod) (loki_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"}) or sum by(pod) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -16205,21 +16228,18 @@ data: { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -16304,21 +16324,18 @@ data: { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -16392,7 +16409,6 @@ data: { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -16454,7 +16470,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -16591,6 +16607,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -16625,9 +16642,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -16702,7 +16718,6 @@ data: { "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} 99th percentile", "refId": "A", "step": 10 @@ -16710,7 +16725,6 @@ data: { "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} 50th percentile", "refId": "B", "step": 10 @@ -16718,7 +16732,6 @@ data: { "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) ", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} Average", "refId": "C", "step": 10 @@ -16767,13 +16780,16 @@ data: "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" - } + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "unit": "s" } }, "fill": 1, @@ -16864,6 +16880,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -16898,9 +16915,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -16975,7 +16991,6 @@ data: { "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} 99th percentile", "refId": "A", "step": 10 @@ -16983,7 +16998,6 @@ data: { "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} 50th percentile", "refId": "B", "step": 10 @@ -16991,7 +17005,6 @@ data: { "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) ", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} Average", "refId": "C", "step": 10 @@ -17040,13 +17053,16 @@ data: "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" - } + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "unit": "s" } }, "fill": 1, @@ -17137,6 +17153,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -17171,9 +17188,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -17248,7 +17264,6 @@ data: { "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} 99th percentile", "refId": "A", "step": 10 @@ -17256,7 +17271,6 @@ data: { "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} 50th percentile", "refId": "B", "step": 10 @@ -17264,7 +17278,6 @@ data: { "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) ", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} Average", "refId": "C", "step": 10 @@ -17313,13 +17326,16 @@ data: "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" - } + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "unit": "s" } }, "fill": 1, @@ -17410,6 +17426,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -17444,9 +17461,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -17521,7 +17537,6 @@ data: { "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} 99th percentile", "refId": "A", "step": 10 @@ -17529,7 +17544,6 @@ data: { "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} 50th percentile", "refId": "B", "step": 10 @@ -17537,7 +17551,6 @@ data: { "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) ", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} Average", "refId": "C", "step": 10 @@ -17586,13 +17599,16 @@ data: "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" - } + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "unit": "s" } }, "fill": 1, @@ -17683,6 +17699,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -17717,9 +17734,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -17794,21 +17810,18 @@ data: { "expr": "histogram_quantile(0.99, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th Percentile", "refId": "A" }, { "expr": "histogram_quantile(0.50, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th Percentile", "refId": "B" }, { "expr": "sum(rate(loki_index_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval])) * 1e3 / sum(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C" } @@ -17856,13 +17869,16 @@ data: "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" - } + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "unit": "s" } }, "fill": 1, @@ -17953,6 +17969,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -17987,9 +18004,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -18064,21 +18080,18 @@ data: { "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th Percentile", "refId": "A" }, { "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th Percentile", "refId": "B" }, { "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C" } @@ -18126,13 +18139,16 @@ data: "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" - } + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "unit": "s" } }, "fill": 1, @@ -18226,7 +18242,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -18401,21 +18417,18 @@ data: { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -18499,21 +18512,18 @@ data: { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -18586,7 +18596,6 @@ data: { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/compactor\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -18698,7 +18707,7 @@ data: "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 6, "stack": false, "steppedLine": false, "targets": [ @@ -18712,7 +18721,7 @@ data: "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Last Compact and Mark Operation Success", + "title": "Last Compact Tables Operation Success", "tooltip": { "shared": true, "sort": 2, @@ -18772,14 +18781,13 @@ data: "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 6, "stack": false, "steppedLine": false, "targets": [ { "expr": "loki_boltdb_shipper_compact_tables_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}", "format": "time_series", - "intervalFactor": 2, "legendFormat": "duration", "legendLink": null } @@ -18787,7 +18795,7 @@ data: "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Compact and Mark Operations Duration", + "title": "Compact Tables Operations Duration", "tooltip": { "shared": true, "sort": 2, @@ -18819,88 +18827,13 @@ data: "show": false } ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (status)(rate(loki_boltdb_shipper_compact_tables_operation_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{success}}", - "legendLink": null - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Compact and Mark Operations Per Status", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Compact and Mark", + "title": "Compaction", "titleSize": "h6" }, { @@ -18913,8 +18846,8 @@ data: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 7, + "fill": 1, + "id": 6, "legend": { "avg": false, "current": false, @@ -18925,7 +18858,7 @@ data: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, @@ -18934,22 +18867,21 @@ data: "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, - "stack": true, + "span": 6, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "count by(action)(loki_boltdb_shipper_retention_marker_table_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"})", + "expr": "sum(increase(loki_compactor_skipped_compacting_locked_table_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__range]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{action}}", + "legendFormat": "{{table_name}}", "legendLink": null } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Processed Tables Per Action", + "title": "Number of times Tables were skipped during Compaction", "tooltip": { "shared": true, "sort": 2, @@ -18988,8 +18920,8 @@ data: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 8, + "fill": 1, + "id": 7, "legend": { "avg": false, "current": false, @@ -19000,7 +18932,7 @@ data: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, @@ -19009,22 +18941,21 @@ data: "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, - "stack": true, + "span": 6, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "count by(table,action)(loki_boltdb_shipper_retention_marker_table_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\" , action=~\"modified|deleted\"})", + "expr": "sum by (status)(rate(loki_boltdb_shipper_compact_tables_operation_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{table}}-{{action}}", + "legendFormat": "{{success}}", "legendLink": null } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Modified Tables", + "title": "Compact Tables Operations Per Status", "tooltip": { "shared": true, "sort": 2, @@ -19056,15 +18987,46 @@ data: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 9, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "blue", + "mode": "fixed" + }, + "custom": { }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "dateTimeFromNow" + } + }, + "fill": 1, + "id": 8, "legend": { "avg": false, "current": false, @@ -19075,9 +19037,24 @@ data: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": { }, + "textMode": "auto" + }, "percentage": false, "pointradius": 5, "points": false, @@ -19085,27 +19062,26 @@ data: "seriesOverrides": [ ], "spaceLength": 10, "span": 4, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum by (table)(rate(loki_boltdb_shipper_retention_marker_count_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >0", + "expr": "loki_compactor_apply_retention_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"} * 1e3", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{table}}", - "legendLink": null + "instant": true, + "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Marks Creation Rate Per Table", + "title": "Last Mark Operation Success", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "stat", "xaxis": { "buckets": null, "mode": "time", @@ -19131,19 +19107,7 @@ data: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Per Table Marker", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { }, "bars": false, @@ -19151,8 +19115,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "format": "short", - "id": 10, + "id": 9, "legend": { "avg": false, "current": false, @@ -19172,28 +19135,27 @@ data: "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum (increase(loki_boltdb_shipper_retention_marker_count_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[24h]))", + "expr": "loki_compactor_apply_retention_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}", "format": "time_series", - "instant": true, - "intervalFactor": 2, - "refId": "A" + "legendFormat": "duration", + "legendLink": null } ], - "thresholds": "70,80", + "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Marked Chunks (24h)", + "title": "Mark Operations Duration", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "singlestat", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -19203,7 +19165,7 @@ data: }, "yaxes": [ { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -19227,7 +19189,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 11, + "id": 10, "legend": { "avg": false, "current": false, @@ -19247,36 +19209,21 @@ data: "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "sum by (status)(rate(loki_compactor_apply_retention_operation_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" + "legendFormat": "{{success}}", + "legendLink": null } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Mark Table Latency", + "title": "Mark Operations Per Status", "tooltip": { "shared": true, "sort": 2, @@ -19292,7 +19239,7 @@ data: }, "yaxes": [ { - "format": "ms", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -19314,7 +19261,7 @@ data: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "", + "title": "Retention", "titleSize": "h6" }, { @@ -19327,9 +19274,8 @@ data: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "format": "short", - "id": 12, + "fill": 10, + "id": 11, "legend": { "avg": false, "current": false, @@ -19340,7 +19286,7 @@ data: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, @@ -19349,28 +19295,27 @@ data: "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 4, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum (increase(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[24h]))", + "expr": "count by(action)(loki_boltdb_shipper_retention_marker_table_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"})", "format": "time_series", - "instant": true, - "intervalFactor": 2, - "refId": "A" + "legendFormat": "{{action}}", + "legendLink": null } ], - "thresholds": "70,80", + "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Delete Chunks (24h)", + "title": "Processed Tables Per Action", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "singlestat", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -19403,8 +19348,8 @@ data: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 13, + "fill": 10, + "id": 12, "legend": { "avg": false, "current": false, @@ -19415,7 +19360,7 @@ data: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, @@ -19424,36 +19369,21 @@ data: "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 4, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "expr": "count by(table,action)(loki_boltdb_shipper_retention_marker_table_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\" , action=~\"modified|deleted\"})", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C" + "legendFormat": "{{table}}-{{action}}", + "legendLink": null } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Delete Latency", + "title": "Modified Tables", "tooltip": { "shared": true, "sort": 2, @@ -19469,7 +19399,7 @@ data: }, "yaxes": [ { - "format": "ms", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -19485,27 +19415,15 @@ data: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Sweeper", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 14, + "fill": 10, + "id": 13, "legend": { "avg": false, "current": false, @@ -19516,7 +19434,7 @@ data: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, @@ -19526,21 +19444,20 @@ data: "seriesOverrides": [ ], "spaceLength": 10, "span": 4, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "time() - (loki_boltdb_shipper_retention_sweeper_marker_file_processing_current_time{cluster=~\"$cluster\", namespace=~\"$namespace\"} > 0)", + "expr": "sum by (table)(rate(loki_boltdb_shipper_retention_marker_count_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >0", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "lag", + "legendFormat": "{{table}}", "legendLink": null } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Sweeper Lag", + "title": "Marks Creation Rate Per Table", "tooltip": { "shared": true, "sort": 2, @@ -19556,11 +19473,11 @@ data: }, "yaxes": [ { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { @@ -19572,7 +19489,19 @@ data: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Per Table Marker", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { }, "bars": false, @@ -19580,7 +19509,8 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 15, + "format": "short", + "id": 14, "legend": { "avg": false, "current": false, @@ -19600,28 +19530,27 @@ data: "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 6, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(loki_boltdb_shipper_retention_sweeper_marker_files_current{cluster=~\"$cluster\", namespace=~\"$namespace\"})", + "expr": "sum (increase(loki_boltdb_shipper_retention_marker_count_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[24h]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "count", - "legendLink": null + "instant": true, + "refId": "A" } ], - "thresholds": [ ], + "thresholds": "70,80", "timeFrom": null, "timeShift": null, - "title": "Marks Files to Process", + "title": "Marked Chunks (24h)", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "singlestat", "xaxis": { "buckets": null, "mode": "time", @@ -19655,7 +19584,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 16, + "id": 15, "legend": { "avg": false, "current": false, @@ -19675,22 +19604,33 @@ data: "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 6, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum by (status)(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{status}}", - "legendLink": null + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Delete Rate Per Status", + "title": "Mark Table Latency", "tooltip": { "shared": true, "sort": 2, @@ -19706,7 +19646,7 @@ data: }, "yaxes": [ { - "format": "short", + "format": "ms", "label": null, "logBase": 1, "max": null, @@ -19736,8 +19676,415 @@ data: "height": "250px", "panels": [ { - "datasource": "$loki_datasource", + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (increase(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[24h]))", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Delete Chunks (24h)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Delete Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Sweeper", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "time() - (loki_boltdb_shipper_retention_sweeper_marker_file_processing_current_time{cluster=~\"$cluster\", namespace=~\"$namespace\"} > 0)", + "format": "time_series", + "legendFormat": "lag", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Sweeper Lag", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(loki_boltdb_shipper_retention_sweeper_marker_files_current{cluster=~\"$cluster\", namespace=~\"$namespace\"})", + "format": "time_series", + "legendFormat": "count", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Marks Files to Process", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status)(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{status}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Delete Rate Per Status", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$loki_datasource", + "id": 21, "span": 12, "targets": [ { @@ -19770,7 +20117,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -19955,21 +20302,18 @@ data: { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -20053,21 +20397,18 @@ data: { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -20140,7 +20481,6 @@ data: { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -20226,7 +20566,6 @@ data: { "expr": "sum by(pod) (loki_ingester_memory_streams{cluster=~\"$cluster\", job=~\"($namespace)/ingester.*\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -20311,21 +20650,18 @@ data: { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -20410,21 +20746,18 @@ data: { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -20498,7 +20831,6 @@ data: { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ingester.*\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -20572,7 +20904,6 @@ data: { "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}} - {{device}}", "legendLink": null } @@ -20648,7 +20979,6 @@ data: { "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}} - {{device}}", "legendLink": null } @@ -20724,7 +21054,6 @@ data: { "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"ingester.*.*\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{persistentvolumeclaim}}", "legendLink": null } @@ -20788,7 +21117,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -20925,6 +21254,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -20959,9 +21289,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -21036,7 +21365,6 @@ data: { "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th percentile", "refId": "A", "step": 10 @@ -21044,7 +21372,6 @@ data: { "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th percentile", "refId": "B", "step": 10 @@ -21052,7 +21379,6 @@ data: { "expr": "1e3 * sum(cluster_job:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}) / sum(cluster_job:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C", "step": 10 @@ -21107,21 +21433,12 @@ data: "height": "250px", "panels": [ { - "aliasColors": { - "1xx": "#EAB839", - "2xx": "#7EB26D", - "3xx": "#6ED0E0", - "4xx": "#EF843C", - "5xx": "#E24D42", - "cancel": "#A9A9A9", - "error": "#E24D42", - "success": "#7EB26D" - }, + "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, + "fill": 1, "id": 3, "legend": { "avg": false, @@ -21133,7 +21450,7 @@ data: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, @@ -21143,21 +21460,20 @@ data: "seriesOverrides": [ ], "spaceLength": 10, "span": 6, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum (rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/distributor\",}[$__rate_interval])) / sum(rate(loki_distributor_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/distributor\",}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{status}}", - "refId": "A" + "legendFormat": "bytes", + "legendLink": null } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "QPS", + "title": "Per Total Received Bytes", "tooltip": { "shared": true, "sort": 2, @@ -21218,38 +21534,20 @@ data: "seriesOverrides": [ ], "spaceLength": 10, "span": 6, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"})) * 1e3", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th percentile", - "refId": "A", - "step": 10 - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"})) * 1e3", + "expr": "sum by (tenant) (rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/distributor\",}[$__rate_interval])) / ignoring(tenant) group_left sum(rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/distributor\",}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th percentile", - "refId": "B", - "step": 10 - }, - { - "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average", - "refId": "C", - "step": 10 + "legendFormat": "{{tenant}}", + "legendLink": null } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Latency", + "title": "Per Tenant", "tooltip": { "shared": true, "sort": 2, @@ -21265,10 +21563,10 @@ data: }, "yaxes": [ { - "format": "ms", + "format": "short", "label": null, "logBase": 1, - "max": null, + "max": 1, "min": 0, "show": true }, @@ -21276,7 +21574,7 @@ data: "format": "short", "label": null, "logBase": 1, - "max": null, + "max": 1, "min": null, "show": false } @@ -21287,7 +21585,7 @@ data: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Ingester - Zone Aware", + "title": "Distributor - Structured Metadata", "titleSize": "h6" }, { @@ -21301,6 +21599,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -21335,9 +21634,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -21410,25 +21708,22 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th percentile", "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th percentile", "refId": "B", "step": 10 }, { - "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"})", + "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C", "step": 10 @@ -21475,7 +21770,7 @@ data: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Ingester", + "title": "Ingester - Zone Aware", "titleSize": "h6" }, { @@ -21489,6 +21784,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -21523,9 +21819,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -21598,25 +21893,25 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99th Percentile", - "refId": "A" + "legendFormat": "99th percentile", + "refId": "A", + "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50th Percentile", - "refId": "B" + "legendFormat": "50th percentile", + "refId": "B", + "step": 10 }, { - "expr": "sum(rate(loki_index_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval])) * 1e3 / sum(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval]))", + "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", - "refId": "C" + "refId": "C", + "step": 10 } ], "thresholds": [ ], @@ -21660,7 +21955,7 @@ data: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Index", + "title": "Ingester", "titleSize": "h6" }, { @@ -21674,6 +21969,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -21708,9 +22004,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -21783,23 +22078,20 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval]))", + "expr": "sum(rate(loki_index_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval])) * 1e3 / sum(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C" } @@ -21845,148 +22137,31 @@ data: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "BoltDB Shipper", + "title": "Index", "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "loki" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data Source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(loki_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Loki / Writes", - "uid": "writes", - "version": 0 - } -kind: ConfigMap -metadata: - annotations: - grafana_dashboard_folder: /dashboards/Loki Mixin - labels: - grafana_dashboard: "1" - name: loki-writes.json - namespace: monitoring-system ---- -apiVersion: v1 -data: - memcached-overview.json: | - { - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ ], - "refresh": "10s", - "rows": [ + }, { "collapse": false, "height": "250px", "panels": [ { - "aliasColors": { }, + "aliasColors": { + "1xx": "#EAB839", + "2xx": "#7EB26D", + "3xx": "#6ED0E0", + "4xx": "#EF843C", + "5xx": "#E24D42", + "OK": "#7EB26D", + "cancel": "#A9A9A9", + "error": "#E24D42", + "success": "#7EB26D" + }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 1, + "fill": 10, + "id": 11, "legend": { "avg": false, "current": false, @@ -21997,7 +22172,7 @@ data: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], "nullPointMode": "null as zero", "percentage": false, @@ -22007,20 +22182,20 @@ data: "seriesOverrides": [ ], "spaceLength": 10, "span": 6, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(memcached_commands_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\", command=\"get\", status=\"hit\"}[$__rate_interval])) / sum(rate(memcached_commands_total{cluster=~\"$cluster\", job=~\"$job\", command=\"get\"}[$__rate_interval]))", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "legendFormat": "Hit Rate", - "legendLink": null + "legendFormat": "{{status}}", + "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Hit Rate", + "title": "QPS", "tooltip": { "shared": true, "sort": 2, @@ -22036,7 +22211,7 @@ data: }, "yaxes": [ { - "format": "percentunit", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -22060,7 +22235,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 2, + "id": 12, "legend": { "avg": false, "current": false, @@ -22085,16 +22260,28 @@ data: "steppedLine": false, "targets": [ { - "expr": "topk(20,\n max by (cluster, job, instance) (\n memcached_current_connections{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"} / memcached_max_connections{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}\n))\n", + "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "legendFormat": "{{ cluster }} / {{ job }} / {{ instance }}", - "legendLink": null + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Top 20 Highest Connection Usage", + "title": "Latency", "tooltip": { "shared": true, "sort": 2, @@ -22110,7 +22297,7 @@ data: }, "yaxes": [ { - "format": "percentunit", + "format": "ms", "label": null, "logBase": 1, "max": null, @@ -22132,9 +22319,136 @@ data: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Hits", + "title": "BoltDB Shipper", "titleSize": "h6" - }, + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "loki" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(loki_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Loki / Writes", + "uid": "writes", + "version": 0 + } +kind: ConfigMap +metadata: + annotations: + grafana_dashboard_folder: /dashboards/Loki Mixin + labels: + grafana_dashboard: "1" + name: loki-writes.json + namespace: monitoring-system +--- +apiVersion: v1 +data: + memcached-overview.json: | + { + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ ], + "refresh": "10s", + "rows": [ { "collapse": false, "height": "250px", @@ -22146,7 +22460,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 3, + "id": 1, "legend": { "avg": false, "current": false, @@ -22166,21 +22480,21 @@ data: "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 6, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum by(command, status) (rate(memcached_commands_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(rate(memcached_commands_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\", command=\"get\", status=\"hit\"}[$__rate_interval])) / sum(rate(memcached_commands_total{cluster=~\"$cluster\", job=~\"$job\", command=\"get\"}[$__rate_interval]))", "format": "time_series", - "legendFormat": "{{command}} {{status}}", + "legendFormat": "Hit Rate", "legendLink": null } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Commands", + "title": "Hit Rate", "tooltip": { "shared": true, "sort": 2, @@ -22196,7 +22510,7 @@ data: }, "yaxes": [ { - "format": "short", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -22220,7 +22534,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 4, + "id": 2, "legend": { "avg": false, "current": false, @@ -22240,21 +22554,21 @@ data: "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 6, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (rate(memcached_items_evicted_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "topk(20,\n max by (cluster, job, instance) (\n memcached_current_connections{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"} / memcached_max_connections{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}\n))\n", "format": "time_series", - "legendFormat": "{{instance}}", + "legendFormat": "{{ cluster }} / {{ job }} / {{ instance }}", "legendLink": null } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Evictions", + "title": "Top 20 Highest Connection Usage", "tooltip": { "shared": true, "sort": 2, @@ -22270,7 +22584,7 @@ data: }, "yaxes": [ { - "format": "short", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -22286,7 +22600,19 @@ data: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Hits", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { }, "bars": false, @@ -22294,7 +22620,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 5, + "id": 3, "legend": { "avg": false, "current": false, @@ -22319,16 +22645,16 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (rate(memcached_items_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum by(command, status) (rate(memcached_commands_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", - "legendFormat": "{{instance}}", + "legendFormat": "{{command}} {{status}}", "legendLink": null } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Stored", + "title": "Commands", "tooltip": { "shared": true, "sort": 2, @@ -22360,27 +22686,15 @@ data: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ops", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 6, + "fill": 1, + "id": 4, "legend": { "avg": false, "current": false, @@ -22391,7 +22705,7 @@ data: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, @@ -22401,11 +22715,11 @@ data: "seriesOverrides": [ ], "spaceLength": 10, "span": 4, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum by (instance) (\n rate(memcached_process_user_cpu_seconds_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) +\n rate(memcached_process_system_cpu_seconds_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n)\n", + "expr": "sum by(instance) (rate(memcached_items_evicted_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{instance}}", "legendLink": null @@ -22414,7 +22728,7 @@ data: "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "CPU", + "title": "Evictions", "tooltip": { "shared": true, "sort": 2, @@ -22453,8 +22767,8 @@ data: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 7, + "fill": 1, + "id": 5, "legend": { "avg": false, "current": false, @@ -22465,7 +22779,7 @@ data: "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], "nullPointMode": "null as zero", "percentage": false, @@ -22475,11 +22789,11 @@ data: "seriesOverrides": [ ], "spaceLength": 10, "span": 4, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (memcached_current_bytes{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"})", + "expr": "sum by(instance) (rate(memcached_items_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{instance}}", "legendLink": null @@ -22488,7 +22802,7 @@ data: "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Memory", + "title": "Stored", "tooltip": { "shared": true, "sort": 2, @@ -22504,7 +22818,7 @@ data: }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -22520,7 +22834,19 @@ data: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ops", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { }, "bars": false, @@ -22528,7 +22854,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 8, + "id": 6, "legend": { "avg": false, "current": false, @@ -22553,7 +22879,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (memcached_current_items{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"})", + "expr": "sum by (instance) (\n rate(memcached_process_user_cpu_seconds_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) +\n rate(memcached_process_system_cpu_seconds_total{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n)\n", "format": "time_series", "legendFormat": "{{instance}}", "legendLink": null @@ -22562,7 +22888,155 @@ data: "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Items", + "title": "CPU", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance) (memcached_current_bytes{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "legendFormat": "{{instance}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Memory", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance) (memcached_current_items{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "legendFormat": "{{instance}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Items", "tooltip": { "shared": true, "sort": 2, @@ -23980,7 +24454,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -26638,7 +27112,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -27575,7 +28049,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -29821,7 +30295,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -30143,7 +30617,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -31071,7 +31545,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -31332,7 +31806,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -32477,7 +32951,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -33789,7 +34263,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -33974,7 +34448,7 @@ data: "uid": "$datasource" }, "exemplar": false, - "expr": "(\n # gRPC errors are not tracked as 5xx but \"error\".\n sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.*|error\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", + "expr": "(\n # gRPC errors are not tracked as 5xx but \"error\".\n sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.*|error\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", "instant": false, "legendFormat": "Writes", "range": true @@ -34108,7 +34582,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -34182,21 +34656,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", "format": "time_series", "legendFormat": "99th percentile", "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", "format": "time_series", "legendFormat": "50th percentile", "refId": "B", "step": 10 }, { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", "format": "time_series", "legendFormat": "Average", "refId": "C", @@ -34271,13 +34745,13 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})", + "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", "format": "time_series", "legendFormat": "samples / sec", "legendLink": null }, { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})", + "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", "format": "time_series", "legendFormat": "exemplars / sec", "legendLink": null @@ -35248,7 +35722,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -38397,7 +38871,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -40503,7 +40977,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -43117,7 +43591,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -47472,7 +47946,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -48468,7 +48942,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -49377,7 +49851,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -49725,7 +50199,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -49837,7 +50311,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -49945,7 +50419,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -50057,7 +50531,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))\n", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))\n", "format": null, "instant": false, "interval": "", @@ -50736,12 +51210,15 @@ data: "type": "table" }, { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": "$datasource", - "fill": 1, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + }, + "unit": "percentunit" + } + }, "gridPos": { "h": 8, "w": 8, @@ -50749,31 +51226,19 @@ data: "y": 8 }, "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, "targets": [ { - "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))[1h:])\n)\n", + "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))[1h:])\n)\n", "format": "time_series", "legendFormat": "writes", "legendLink": null @@ -50785,41 +51250,8 @@ data: "legendLink": null } ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, "title": "Latency vs 24h ago", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] + "type": "timeseries" } ], "refresh": "10s", @@ -50837,7 +51269,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -53401,7 +53833,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -53774,7 +54206,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -53915,165 +54347,1325 @@ data: "height": "250px", "panels": [ { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${lokidatasource}", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Time range" - }, - "properties": [ - { - "id": "mappings", - "value": [ - { - "from": "", - "id": 1, - "text": "Instant query", - "to": "", - "type": 1, - "value": "0" - } - ] - }, - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Step" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - } - ] - }, + "fill": 1, "id": 1, - "span": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "{cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | response_time > ${min_duration}", - "instant": false, - "legendFormat": "", - "range": true, - "refId": "A" + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null } ], - "title": "Slow queries", - "transformations": [ + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Response time", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ { - "id": "extractFields", - "options": { - "source": "labels" - } + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "id": "calculateField", - "options": { - "alias": "Time range", - "binary": { - "left": "param_end", - "operator": "-", - "reducer": "sum", - "right": "param_start" - }, - "mode": "binary", - "reduce": { - "reducer": "sum" - }, - "replaceFields": false - } + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null }, { - "id": "organize", - "options": { - "excludeByName": { - "Line": true, - "Time": true, - "caller": true, - "cluster": true, - "container": true, - "host": true, - "id": true, - "job": true, - "labels": true, - "level": true, - "line": true, - "method": true, - "msg": true, - "name": true, - "namespace": true, - "param_end": true, - "param_start": true, - "param_time": true, - "path": true, - "pod": true, - "pod_template_hash": true, - "query_wall_time_seconds": true, - "stream": true, - "traceID": true, - "tsNs": true - }, - "indexByName": { - "Time range": 3, - "param_query": 2, - "param_step": 4, - "response_time": 5, - "ts": 0, - "user": 1 - }, - "renameByName": { - "org_id": "Tenant ID", - "param_query": "Query", - "param_step": "Step", - "response_time": "Duration" - } - } + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null } ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Fetched series", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, - "hide": 0, - "label": "Data Source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Fetched chunks", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Response size", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Time span", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Accross tenants", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 response time", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 fetched series", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 fetched chunks", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 response size", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 time span", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Top 10 tenants", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 response time", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 fetched series", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 fetched chunks", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 response size", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 time span", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Top 10 User-Agents", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "${lokidatasource}", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time range" + }, + "properties": [ + { + "id": "mappings", + "value": [ + { + "from": "", + "id": 1, + "text": "Instant query", + "to": "", + "type": 1, + "value": "0" + } + ] + }, + { + "id": "unit", + "value": "s" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Step" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] + } + ] + }, + "height": "500px", + "id": 16, + "span": 12, + "targets": [ + { + "expr": "{cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration}", + "instant": false, + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Slow queries", + "transformations": [ + { + "id": "extractFields", + "options": { + "source": "labels" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Line": true, + "Time": true, + "caller": true, + "cluster": true, + "container": true, + "host": true, + "id": true, + "job": true, + "labels": true, + "level": true, + "line": true, + "method": true, + "msg": true, + "name": true, + "namespace": true, + "path": true, + "pod": true, + "pod_template_hash": true, + "query_wall_time_seconds": true, + "stream": true, + "traceID": true, + "tsNs": true + }, + "indexByName": { + "length": 2, + "param_end": 4, + "param_query": 7, + "param_start": 3, + "param_step": 6, + "param_time": 5, + "response_time": 8, + "ts": 0, + "user": 1 + }, + "renameByName": { + "org_id": "Tenant ID", + "param_query": "Query", + "param_step": "Step", + "response_time": "Duration" + } + } + } + ], + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", "current": { "text": "prod", "value": "prod" @@ -54164,6 +55756,25 @@ data: ], "query": ".*", "type": "textbox" + }, + { + "current": { + "selected": true, + "text": ".*", + "value": ".*" + }, + "hide": 0, + "label": "User-Agent HTTP Header", + "name": "user_agent", + "options": [ + { + "selected": true, + "text": ".*", + "value": ".*" + } + ], + "query": ".*", + "type": "textbox" } ] }, @@ -54311,7 +55922,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(\n (\n cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n - cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n (\n cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n - cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "in-memory", "legendLink": null @@ -54323,13 +55934,13 @@ data: "legendLink": null }, { - "expr": "sum(\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "active", "legendLink": null }, { - "expr": "sum by (name) (\n cortex_ingester_active_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n) > 0\n", + "expr": "sum by (name) (\n cortex_ingester_active_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", "format": "time_series", "legendFormat": "active ({{ name }})", "legendLink": null @@ -54410,13 +56021,13 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(\n cortex_ingester_active_native_histogram_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n cortex_ingester_active_native_histogram_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "active", "legendLink": null }, { - "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n) > 0\n", + "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", "format": "time_series", "legendFormat": "active ({{ name }})", "legendLink": null @@ -54497,13 +56108,13 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(\n cortex_ingester_active_native_histogram_buckets{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n cortex_ingester_active_native_histogram_buckets{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "buckets", "legendLink": null }, { - "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_buckets_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n) > 0\n", + "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_buckets_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", "format": "time_series", "legendFormat": "buckets ({{ name }})", "legendLink": null @@ -54584,7 +56195,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "series", "legendLink": null @@ -54653,7 +56264,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "time() - max(cortex_distributor_latest_seen_sample_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"} > 0)", + "expr": "time() - max(cortex_distributor_latest_seen_sample_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} > 0)", "format": "time_series", "legendFormat": "age", "legendLink": null @@ -54803,7 +56414,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_distributor_requests_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_distributor_requests_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "rate", "legendLink": null @@ -54884,7 +56495,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_distributor_received_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_distributor_received_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "rate", "legendLink": null @@ -54965,7 +56576,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (reason) (rate(cortex_discarded_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum by (reason) (rate(cortex_discarded_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{ reason }}", "legendLink": null @@ -55046,7 +56657,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_distributor_samples_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_distributor_samples_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "rate", "legendLink": null @@ -55127,7 +56738,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "rate", "legendLink": null @@ -55208,13 +56819,13 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_distributor_deduped_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_distributor_deduped_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "deduplicated", "legendLink": null }, { - "expr": "sum(rate(cortex_distributor_non_ha_samples_received_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_distributor_non_ha_samples_received_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "non-HA", "legendLink": null @@ -55289,7 +56900,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (reason) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum by (reason) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{ reason }} (distributor)", "legendLink": null @@ -55376,7 +56987,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_distributor_exemplars_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_distributor_exemplars_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "rate", "legendLink": null @@ -55445,7 +57056,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "rate", "legendLink": null @@ -55520,7 +57131,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (reason) (rate(cortex_discarded_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum by (reason) (rate(cortex_discarded_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{ reason }}", "legendLink": null @@ -55589,7 +57200,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(\n rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval])\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval])\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "rate", "legendLink": null @@ -56532,6 +58143,261 @@ data: "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (user) (cortex_alertmanager_alerts{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "alerts", + "legendLink": null + }, + { + "expr": "sum by (user) (cortex_alertmanager_silences{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "silences", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Alerts", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "failed": "#E24D42", + "successful": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 30, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(\nsum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))\n-\non() (sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) or on () vector(0))\n) > 0\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "NPS", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 31, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(\nsum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration)\n-\n(sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration) or\n (sum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration) * 0)\n)) > 0\n", + "format": "time_series", + "legendFormat": "success - {{ integration }}", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration)", + "format": "time_series", + "legendFormat": "failed - {{ integration }}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "NPS by integration", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alertmanager", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 32, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, "span": 6, "stack": false, "steppedLine": false, @@ -56586,7 +58452,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 30, + "id": 33, "legend": { "avg": false, "current": false, @@ -56672,7 +58538,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 31, + "id": 34, "legend": { "avg": false, "current": false, @@ -56746,7 +58612,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 32, + "id": 35, "legend": { "avg": false, "current": false, @@ -56835,7 +58701,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -57130,7 +58996,7 @@ data: ], "targets": [ { - "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n )\n)\n", + "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n )\n)\n", "format": "table", "instant": true, "legendFormat": "", @@ -57256,7 +59122,7 @@ data: ], "targets": [ { - "expr": "topk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"} )\n)\n)", + "expr": "topk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} )\n)\n)", "format": "table", "instant": true, "legendFormat": "", @@ -57344,7 +59210,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"} )\n)\n\nand\ntopk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"} @ end())\n)\n - sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"} @ start())\n)\n)\n", + "expr": "sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} )\n)\n\nand\ntopk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} @ end())\n)\n - sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} @ start())\n)\n)\n", "format": "time_series", "legendFormat": "{{ user }}", "legendLink": null @@ -57468,7 +59334,7 @@ data: ], "targets": [ { - "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}[5m])))", + "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", "format": "table", "instant": true, "legendFormat": "", @@ -57556,7 +59422,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", + "expr": "sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", "format": "time_series", "legendFormat": "{{ user }}", "legendLink": null @@ -57680,7 +59546,7 @@ data: ], "targets": [ { - "expr": "topk($limit, sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor|cortex|mimir|mimir-write.*))\"}[5m])))", + "expr": "topk($limit, sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", "format": "table", "instant": true, "legendFormat": "", @@ -57768,7 +59634,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", + "expr": "sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", "format": "time_series", "legendFormat": "{{ user }}", "legendLink": null @@ -57892,7 +59758,7 @@ data: ], "targets": [ { - "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n )\n)\n", + "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n )\n)\n", "format": "table", "instant": true, "legendFormat": "", @@ -58018,7 +59884,7 @@ data: ], "targets": [ { - "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}[5m])))", + "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", "format": "table", "instant": true, "legendFormat": "", @@ -58336,7 +60202,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -59493,7 +61359,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -60854,7 +62720,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -61048,7 +62914,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})", + "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", "format": "time_series", "instant": true, "refId": "A" @@ -61124,7 +62990,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})", + "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", "format": "time_series", "instant": true, "refId": "A" @@ -61200,7 +63066,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(cortex_ingester_memory_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}))\n", + "expr": "sum(cortex_ingester_memory_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}))\n", "format": "time_series", "instant": true, "refId": "A" @@ -61276,7 +63142,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(cortex_ingester_tsdb_exemplar_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}))\n", + "expr": "sum(cortex_ingester_tsdb_exemplar_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}))\n", "format": "time_series", "instant": true, "refId": "A" @@ -61448,7 +63314,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -61522,21 +63388,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", "format": "time_series", "legendFormat": "99th percentile", "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", "format": "time_series", "legendFormat": "50th percentile", "refId": "B", "step": 10 }, { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", "format": "time_series", "legendFormat": "Average", "refId": "C", @@ -61619,7 +63485,7 @@ data: "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval])))", + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval])))", "format": "time_series", "legendFormat": "", "legendLink": null @@ -61917,7 +63783,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -61991,19 +63857,19 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -62099,7 +63965,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -62173,19 +64039,19 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -63115,7 +64981,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(cluster_namespace_job:cortex_distributor_exemplars_in:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})", + "expr": "sum(cluster_namespace_job:cortex_distributor_exemplars_in:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", "format": "time_series", "legendFormat": "incoming exemplars", "legendLink": null @@ -63190,7 +65056,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})", + "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", "format": "time_series", "legendFormat": "received exemplars", "legendLink": null @@ -63265,7 +65131,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(\n cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "ingested exemplars", "legendLink": null @@ -63340,7 +65206,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(\n cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "appended exemplars", "legendLink": null @@ -63426,7 +65292,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (reason) (rate(cortex_distributor_instance_rejected_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "expr": "sum by (reason) (rate(cortex_distributor_instance_rejected_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{reason}}", "legendLink": null @@ -63564,7 +65430,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -63738,7 +65604,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"api_.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"api_.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -63813,7 +65679,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -63822,7 +65688,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -63831,7 +65697,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"api_.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"api_.*\"}[$__rate_interval])) by (route)", + "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\",route=~\"api_.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\",route=~\"api_.*\"}[$__rate_interval])) by (route)", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -63930,7 +65796,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"api_.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"api_.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -64005,7 +65871,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -64014,7 +65880,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -64023,7 +65889,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"api_.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"api_.*\"}[$__rate_interval])) by (route)", + "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\",route=~\"api_.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\",route=~\"api_.*\"}[$__rate_interval])) by (route)", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -64122,7 +65988,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"querier_api_.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"querier_api_.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -64197,7 +66063,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"querier_api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",route=~\"querier_api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -64206,7 +66072,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"querier_api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",route=~\"querier_api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -64215,7 +66081,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"querier_api_.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"querier_api_.*\"}[$__rate_interval])) by (route)", + "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/querier\",route=~\"querier_api_.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\",route=~\"querier_api_.*\"}[$__rate_interval])) by (route)", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -64314,7 +66180,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_querier_external_endpoint_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_querier_external_endpoint_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -64389,7 +66255,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_querier_external_endpoint_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval])) by (le,endpoint)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_querier_external_endpoint_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\"}[$__rate_interval])) by (le,endpoint)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -64398,7 +66264,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_querier_external_endpoint_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval])) by (le,endpoint)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_querier_external_endpoint_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\"}[$__rate_interval])) by (le,endpoint)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -64407,7 +66273,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempo_querier_external_endpoint_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval])) by (endpoint) * 1e3 / sum(rate(tempo_querier_external_endpoint_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval])) by (endpoint)", + "expr": "sum(rate(tempo_querier_external_endpoint_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/querier\"}[$__rate_interval])) by (endpoint) * 1e3 / sum(rate(tempo_querier_external_endpoint_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\"}[$__rate_interval])) by (endpoint)", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -64506,7 +66372,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"/tempopb.Querier/.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/tempopb.Querier/.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -64581,7 +66447,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (le,route)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -64590,7 +66456,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (le,route)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -64599,7 +66465,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (route)", + "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (route)", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -64698,7 +66564,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -64773,7 +66639,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -64782,7 +66648,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -64791,7 +66657,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempo_memcache_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempo_memcache_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/querier\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -64890,7 +66756,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=\"GET\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -64965,7 +66831,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=\"GET\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -64974,7 +66840,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=\"GET\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -64983,7 +66849,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempodb_backend_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=\"GET\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=\"GET\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempodb_backend_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -65050,7 +66916,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -65225,21 +67091,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\"})", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\", resource=\"cpu\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -65325,21 +67191,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\", resource=\"memory\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -65414,7 +67280,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})", + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{instance}}", @@ -65512,21 +67378,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\", resource=\"cpu\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -65612,21 +67478,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\", resource=\"memory\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -65701,7 +67567,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})", + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{instance}}", @@ -65799,21 +67665,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\", resource=\"cpu\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -65899,21 +67765,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\", resource=\"memory\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -65988,7 +67854,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})", + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ingester\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{instance}}", @@ -66086,21 +67952,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\"})", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\", resource=\"cpu\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -66186,21 +68052,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\", resource=\"memory\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -66275,7 +68141,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})", + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/metrics-generator\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{instance}}", @@ -66373,21 +68239,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\", resource=\"cpu\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -66473,21 +68339,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\", resource=\"memory\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -66562,7 +68428,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})", + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{instance}}", @@ -66660,21 +68526,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\", resource=\"cpu\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -66760,21 +68626,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\", resource=\"memory\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -66849,7 +68715,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})", + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/querier\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{instance}}", @@ -66947,21 +68813,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\", resource=\"cpu\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -67047,21 +68913,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\", resource=\"memory\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -67136,7 +69002,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})", + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/compactor\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{instance}}", @@ -67201,7 +69067,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -67515,7 +69381,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]))\n", + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -67627,7 +69493,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]))\n", + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -67735,7 +69601,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]))\n", + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -67847,7 +69713,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}))\n", + "expr": "histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}))\n", "format": null, "instant": false, "interval": "", @@ -67951,7 +69817,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\"}[$__rate_interval]))\n", + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -68063,7 +69929,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\"}[$__rate_interval]))\n", + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -68171,7 +70037,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\"}[$__rate_interval]))\n", + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -68283,7 +70149,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\"}))\n", + "expr": "histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\"}))\n", "format": null, "instant": false, "interval": "", @@ -68558,14 +70424,14 @@ data: "steppedLine": false, "targets": [ { - "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}))[1h:])\n)\n", + "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}))[1h:])\n)\n", "format": "time_series", "interval": "1m", "legendFormat": "writes", "legendLink": null }, { - "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\"}))[1h:])\n)\n", + "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\"}))[1h:])\n)\n", "format": "time_series", "interval": "1m", "legendFormat": "reads", @@ -68624,7 +70490,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -68807,7 +70673,7 @@ data: ], "targets": [ { - "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",user=\"$tenant\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})\n) by (limit_name)\n", + "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",user=\"$tenant\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/compactor\"})\n) by (limit_name)\n", "format": "table", "instant": true, "legendFormat": "", @@ -68906,21 +70772,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_distributor_bytes_received_total{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\"}[$__rate_interval]))", + "expr": "sum(rate(tempo_distributor_bytes_received_total{cluster=~\"$cluster\", job=~\"($namespace)/distributor\",tenant=\"$tenant\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "received", "legendLink": null }, { - "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",user=\"$tenant\",limit_name=\"ingestion_rate_limit_bytes\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",limit_name=\"ingestion_rate_limit_bytes\"})\n) by (ingestion_rate_limit_bytes)\n", + "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",user=\"$tenant\",limit_name=\"ingestion_rate_limit_bytes\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",limit_name=\"ingestion_rate_limit_bytes\"})\n) by (ingestion_rate_limit_bytes)\n", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",user=\"$tenant\",limit_name=\"ingestion_burst_size_bytes\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",limit_name=\"ingestion_burst_size_bytes\"})\n) by (ingestion_burst_size_bytes)\n", + "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",user=\"$tenant\",limit_name=\"ingestion_burst_size_bytes\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",limit_name=\"ingestion_burst_size_bytes\"})\n) by (ingestion_burst_size_bytes)\n", "format": "time_series", "interval": "1m", "legendFormat": "burst limit", @@ -68995,14 +70861,14 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_distributor_spans_received_total{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\"}[$__rate_interval]))", + "expr": "sum(rate(tempo_distributor_spans_received_total{cluster=~\"$cluster\", job=~\"($namespace)/distributor\",tenant=\"$tenant\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "accepted", "legendLink": null }, { - "expr": "sum(rate(tempo_discarded_spans_total{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\"}[$__rate_interval])) by (reason)", + "expr": "sum(rate(tempo_discarded_spans_total{cluster=~\"$cluster\", job=~\"($namespace)/distributor\",tenant=\"$tenant\"}[$__rate_interval])) by (reason)", "format": "time_series", "interval": "1m", "legendFormat": "refused {{ reason }}", @@ -69088,21 +70954,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "max(tempo_ingester_live_traces{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\"})", + "expr": "max(tempo_ingester_live_traces{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",tenant=\"$tenant\"})", "format": "time_series", "interval": "1m", "legendFormat": "live traces", "legendLink": null }, { - "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",user=\"$tenant\",limit_name=\"max_global_traces_per_user\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",limit_name=\"max_global_traces_per_user\"})\n) by (max_global_traces_per_user)\n", + "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",user=\"$tenant\",limit_name=\"max_global_traces_per_user\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",limit_name=\"max_global_traces_per_user\"})\n) by (max_global_traces_per_user)\n", "format": "time_series", "interval": "1m", "legendFormat": "global limit", "legendLink": null }, { - "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",user=\"$tenant\",limit_name=\"max_local_traces_per_user\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",limit_name=\"max_local_traces_per_user\"})\n) by (max_local_traces_per_user)\n", + "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",user=\"$tenant\",limit_name=\"max_local_traces_per_user\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",limit_name=\"max_local_traces_per_user\"})\n) by (max_local_traces_per_user)\n", "format": "time_series", "interval": "1m", "legendFormat": "local limit", @@ -69189,7 +71055,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\",op=\"traces\"}[$__rate_interval])) by (status)", + "expr": "sum(rate(tempo_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\",tenant=\"$tenant\",op=\"traces\"}[$__rate_interval])) by (status)", "format": "time_series", "interval": "1m", "legendFormat": "{{ status }}", @@ -69264,7 +71130,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\",op=\"search\"}[$__rate_interval])) by (status)", + "expr": "sum(rate(tempo_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\",tenant=\"$tenant\",op=\"search\"}[$__rate_interval])) by (status)", "format": "time_series", "interval": "1m", "legendFormat": "{{ status }}", @@ -69345,7 +71211,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "avg(tempodb_blocklist_length{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\"})", + "expr": "avg(tempodb_blocklist_length{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",tenant=\"$tenant\"})", "format": "time_series", "interval": "1m", "legendFormat": "length", @@ -69414,7 +71280,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(tempodb_compaction_outstanding_blocks{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\"})\n/\ncount(tempo_build_info{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})\n", + "expr": "sum(tempodb_compaction_outstanding_blocks{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",tenant=\"$tenant\"})\n/\ncount(tempo_build_info{cluster=~\"$cluster\", job=~\"($namespace)/compactor\"})\n", "format": "time_series", "interval": "1m", "legendFormat": "blocks", @@ -69495,7 +71361,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_metrics_generator_bytes_received_total{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\"}[$__rate_interval]))", + "expr": "sum(rate(tempo_metrics_generator_bytes_received_total{cluster=~\"$cluster\", job=~\"($namespace)/metrics-generator\",tenant=\"$tenant\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "rate", @@ -69576,14 +71442,14 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(tempo_metrics_generator_registry_active_series{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\"})", + "expr": "sum(tempo_metrics_generator_registry_active_series{cluster=~\"$cluster\", job=~\"($namespace)/metrics-generator\",tenant=\"$tenant\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{ tenant }}", "legendLink": null }, { - "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",user=\"$tenant\",limit_name=\"metrics_generator_max_active_series\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",limit_name=\"metrics_generator_max_active_series\"})\n) by (metrics_generator_max_active_series)\n", + "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",user=\"$tenant\",limit_name=\"metrics_generator_max_active_series\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",limit_name=\"metrics_generator_max_active_series\"})\n) by (metrics_generator_max_active_series)\n", "format": "time_series", "interval": "1m", "legendFormat": "limit", @@ -69648,7 +71514,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -69717,7 +71583,7 @@ data: "multi": false, "name": "tenant", "options": [ ], - "query": "label_values(tempodb_blocklist_length{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}, tenant)", + "query": "label_values(tempodb_blocklist_length{cluster=~\"$cluster\", job=~\"($namespace)/compactor\"}, tenant)", "refresh": 1, "regex": "", "sort": 2, @@ -69845,7 +71711,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -69920,7 +71786,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -69929,7 +71795,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -69938,7 +71804,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -70027,7 +71893,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (grpc_status) (\n rate(\n label_replace(\n {cluster=~\"$cluster\", job=~\"($namespace)/tempo\", __name__=~\"envoy_cluster_grpc_proto_collector_trace_v1_TraceService_[0-9]+\"},\n \"grpc_status\", \"$1\", \"__name__\", \"envoy_cluster_grpc_proto_collector_trace_v1_TraceService_(.+)\"\n )\n [$__interval:$__interval]\n )\n)\n", + "expr": "sum by (grpc_status) (\n rate(\n label_replace(\n {cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", __name__=~\"envoy_cluster_grpc_proto_collector_trace_v1_TraceService_[0-9]+\"},\n \"grpc_status\", \"$1\", \"__name__\", \"envoy_cluster_grpc_proto_collector_trace_v1_TraceService_(.+)\"\n )\n [$__interval:$__interval]\n )\n)\n", "format": "time_series", "interval": "1m", "legendFormat": "{{grpc_status}}", @@ -70134,14 +72000,14 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_receiver_accepted_spans{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval]))", + "expr": "sum(rate(tempo_receiver_accepted_spans{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "accepted", "legendLink": null }, { - "expr": "sum(rate(tempo_receiver_refused_spans{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval]))", + "expr": "sum(rate(tempo_receiver_refused_spans{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "refused", @@ -70216,7 +72082,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_distributor_push_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_distributor_push_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -70225,7 +72091,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_distributor_push_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_distributor_push_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -70234,7 +72100,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempo_distributor_push_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_distributor_push_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempo_distributor_push_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_distributor_push_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -70333,7 +72199,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -70408,7 +72274,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -70417,7 +72283,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -70426,7 +72292,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -70525,7 +72391,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",method=\"Memcache.Put\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -70600,7 +72466,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -70609,7 +72475,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -70618,7 +72484,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempo_memcache_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempo_memcache_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",method=\"Memcache.Put\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",method=\"Memcache.Put\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -70717,7 +72583,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=~\"(PUT|POST)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -70792,7 +72658,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -70801,7 +72667,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -70810,7 +72676,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempodb_backend_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempodb_backend_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -70909,7 +72775,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",method=\"Memcache.Put\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -70984,7 +72850,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -70993,7 +72859,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -71002,7 +72868,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempo_memcache_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempo_memcache_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",method=\"Memcache.Put\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",method=\"Memcache.Put\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -71101,7 +72967,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=~\"(PUT|POST)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -71176,7 +73042,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -71185,7 +73051,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -71194,7 +73060,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempodb_backend_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempodb_backend_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -71261,7 +73127,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/kubernetes-mixin/deploy/alerts.yaml b/monitoring-mixins/kubernetes-mixin/deploy/alerts.yaml index 8327466d..27bfcd6b 100644 --- a/monitoring-mixins/kubernetes-mixin/deploy/alerts.yaml +++ b/monitoring-mixins/kubernetes-mixin/deploy/alerts.yaml @@ -351,7 +351,7 @@ groups: rules: - alert: KubePersistentVolumeFillingUp annotations: - description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage }} free. + description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster }} is only {{ $value | humanizePercentage }} free. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup summary: PersistentVolume is filling up. expr: | @@ -362,16 +362,16 @@ groups: ) < 0.03 and kubelet_volume_stats_used_bytes{job="integrations/kubernetes/kubelet"} > 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 for: 1m labels: severity: critical - alert: KubePersistentVolumeFillingUp annotations: - description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available. + description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup summary: PersistentVolume is filling up. expr: | @@ -384,16 +384,16 @@ groups: kubelet_volume_stats_used_bytes{job="integrations/kubernetes/kubelet"} > 0 and predict_linear(kubelet_volume_stats_available_bytes{job="integrations/kubernetes/kubelet"}[6h], 4 * 24 * 3600) < 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 for: 1h labels: severity: warning - alert: KubePersistentVolumeInodesFillingUp annotations: - description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} only has {{ $value | humanizePercentage }} free inodes. + description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster }} only has {{ $value | humanizePercentage }} free inodes. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeinodesfillingup summary: PersistentVolumeInodes are filling up. expr: | @@ -404,16 +404,16 @@ groups: ) < 0.03 and kubelet_volume_stats_inodes_used{job="integrations/kubernetes/kubelet"} > 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 for: 1m labels: severity: critical - alert: KubePersistentVolumeInodesFillingUp annotations: - description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to run out of inodes within four days. Currently {{ $value | humanizePercentage }} of its inodes are free. + description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster }} is expected to run out of inodes within four days. Currently {{ $value | humanizePercentage }} of its inodes are free. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeinodesfillingup summary: PersistentVolumeInodes are filling up. expr: | @@ -426,16 +426,16 @@ groups: kubelet_volume_stats_inodes_used{job="integrations/kubernetes/kubelet"} > 0 and predict_linear(kubelet_volume_stats_inodes_free{job="integrations/kubernetes/kubelet"}[6h], 4 * 24 * 3600) < 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 for: 1h labels: severity: warning - alert: KubePersistentVolumeErrors annotations: - description: The persistent volume {{ $labels.persistentvolume }} has status {{ $labels.phase }}. + description: The persistent volume {{ $labels.persistentvolume }} on Cluster {{ $labels.cluster }} has status {{ $labels.phase }}. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeerrors summary: PersistentVolume is having issues with provisioning. expr: | diff --git a/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-cluster.json b/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-cluster.json index a8c6a344..34dece27 100644 --- a/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-cluster.json +++ b/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-cluster.json @@ -52,7 +52,6 @@ "expr": "cluster:node_cpu:ratio_rate5m{cluster=\"$cluster\"}", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -131,7 +130,6 @@ "expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"integrations/kubernetes/kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -210,7 +208,6 @@ "expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"integrations/kubernetes/kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -289,7 +286,6 @@ "expr": "1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(node_memory_MemTotal_bytes{job=\"integrations/node_exporter\",cluster=\"$cluster\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -368,7 +364,6 @@ "expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"integrations/kubernetes/kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -447,7 +442,6 @@ "expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"integrations/kubernetes/kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -536,7 +530,6 @@ { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null } @@ -766,7 +759,6 @@ "expr": "sum(kube_pod_owner{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -774,7 +766,6 @@ "expr": "count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -782,7 +773,6 @@ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -790,7 +780,6 @@ "expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -798,7 +787,6 @@ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -806,7 +794,6 @@ "expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" }, @@ -814,7 +801,6 @@ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "G" } @@ -905,7 +891,6 @@ { "expr": "sum(container_memory_rss{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null } @@ -1135,7 +1120,6 @@ "expr": "sum(kube_pod_owner{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -1143,7 +1127,6 @@ "expr": "count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -1151,7 +1134,6 @@ "expr": "sum(container_memory_rss{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -1159,7 +1141,6 @@ "expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -1167,7 +1148,6 @@ "expr": "sum(container_memory_rss{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -1175,7 +1155,6 @@ "expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" }, @@ -1183,7 +1162,6 @@ "expr": "sum(container_memory_rss{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "G" } @@ -1399,7 +1377,6 @@ "expr": "sum(irate(container_network_receive_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -1407,7 +1384,6 @@ "expr": "sum(irate(container_network_transmit_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -1415,7 +1391,6 @@ "expr": "sum(irate(container_network_receive_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -1423,7 +1398,6 @@ "expr": "sum(irate(container_network_transmit_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -1431,7 +1405,6 @@ "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -1439,7 +1412,6 @@ "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" } @@ -1530,7 +1502,6 @@ { "expr": "sum(irate(container_network_receive_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null } @@ -1608,7 +1579,6 @@ { "expr": "sum(irate(container_network_transmit_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null } @@ -1698,7 +1668,6 @@ { "expr": "avg(irate(container_network_receive_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null } @@ -1776,7 +1745,6 @@ { "expr": "avg(irate(container_network_transmit_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null } @@ -1866,7 +1834,6 @@ { "expr": "sum(irate(container_network_receive_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null } @@ -1944,7 +1911,6 @@ { "expr": "sum(irate(container_network_transmit_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null } @@ -2034,7 +2000,6 @@ { "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null } @@ -2112,7 +2077,6 @@ { "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null } @@ -2203,7 +2167,6 @@ { "expr": "ceil(sum by(namespace) (rate(container_fs_reads_total{job=\"integrations/kubernetes/cadvisor\", container!=\"\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"integrations/kubernetes/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval])))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null } @@ -2281,7 +2244,6 @@ { "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"integrations/kubernetes/cadvisor\", container!=\"\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"integrations/kubernetes/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null } @@ -2500,7 +2462,6 @@ "expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -2508,7 +2469,6 @@ "expr": "sum by(namespace) (rate(container_fs_writes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -2516,7 +2476,6 @@ "expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -2524,7 +2483,6 @@ "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -2532,7 +2490,6 @@ "expr": "sum by(namespace) (rate(container_fs_writes_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -2540,7 +2497,6 @@ "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" } @@ -2605,7 +2561,7 @@ "value": "Metrics" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-multicluster.json b/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-multicluster.json index 74f6d729..51bf0c29 100644 --- a/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-multicluster.json +++ b/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-multicluster.json @@ -52,7 +52,6 @@ "expr": "cluster:node_cpu:ratio_rate5m", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -131,7 +130,6 @@ "expr": "sum(kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", resource=\"cpu\"}) / sum(kube_node_status_allocatable{job=\"integrations/kubernetes/kube-state-metrics\", resource=\"cpu\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -210,7 +208,6 @@ "expr": "sum(kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", resource=\"cpu\"}) / sum(kube_node_status_allocatable{job=\"integrations/kubernetes/kube-state-metrics\", resource=\"cpu\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -289,7 +286,6 @@ "expr": "1 - sum(:node_memory_MemAvailable_bytes:sum) / sum(node_memory_MemTotal_bytes{job=\"integrations/node_exporter\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -368,7 +364,6 @@ "expr": "sum(kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", resource=\"memory\"}) / sum(kube_node_status_allocatable{job=\"integrations/kubernetes/kube-state-metrics\", resource=\"memory\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -447,7 +442,6 @@ "expr": "sum(kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", resource=\"memory\"}) / sum(kube_node_status_allocatable{job=\"integrations/kubernetes/kube-state-metrics\", resource=\"memory\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -536,7 +530,6 @@ { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (cluster)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{cluster}}", "legendLink": null } @@ -736,7 +729,6 @@ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (cluster)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -744,7 +736,6 @@ "expr": "sum(kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", resource=\"cpu\"}) by (cluster)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -752,7 +743,6 @@ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (cluster) / sum(kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", resource=\"cpu\"}) by (cluster)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -760,7 +750,6 @@ "expr": "sum(kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", resource=\"cpu\"}) by (cluster)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -768,7 +757,6 @@ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (cluster) / sum(kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", resource=\"cpu\"}) by (cluster)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" } @@ -859,7 +847,6 @@ { "expr": "sum(container_memory_rss{job=\"integrations/kubernetes/cadvisor\", container!=\"\"}) by (cluster)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{cluster}}", "legendLink": null } @@ -1059,7 +1046,6 @@ "expr": "sum(container_memory_rss{job=\"integrations/kubernetes/cadvisor\", container!=\"\"}) by (cluster)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -1067,7 +1053,6 @@ "expr": "sum(kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", resource=\"memory\"}) by (cluster)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -1075,7 +1060,6 @@ "expr": "sum(container_memory_rss{job=\"integrations/kubernetes/cadvisor\", container!=\"\"}) by (cluster) / sum(kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", resource=\"memory\"}) by (cluster)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -1083,7 +1067,6 @@ "expr": "sum(kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", resource=\"memory\"}) by (cluster)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -1091,7 +1074,6 @@ "expr": "sum(container_memory_rss{job=\"integrations/kubernetes/cadvisor\", container!=\"\"}) by (cluster) / sum(kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", resource=\"memory\"}) by (cluster)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" } @@ -1156,7 +1138,7 @@ "value": "Metrics" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-namespace.json b/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-namespace.json index e84a2a6d..0bc085f4 100644 --- a/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-namespace.json +++ b/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-namespace.json @@ -52,7 +52,6 @@ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -131,7 +130,6 @@ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -210,7 +208,6 @@ "expr": "sum(container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -289,7 +286,6 @@ "expr": "sum(container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -401,21 +397,18 @@ { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "quota - requests", "legendLink": null }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "quota - limits", "legendLink": null } @@ -615,7 +608,6 @@ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -623,7 +615,6 @@ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -631,7 +622,6 @@ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -639,7 +629,6 @@ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -647,7 +636,6 @@ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" } @@ -761,21 +749,18 @@ { "expr": "sum(container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "quota - requests", "legendLink": null }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "quota - limits", "legendLink": null } @@ -1020,7 +1005,6 @@ "expr": "sum(container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -1028,7 +1012,6 @@ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -1036,7 +1019,6 @@ "expr": "sum(container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -1044,7 +1026,6 @@ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -1052,7 +1033,6 @@ "expr": "sum(container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -1060,7 +1040,6 @@ "expr": "sum(container_memory_rss{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" }, @@ -1068,7 +1047,6 @@ "expr": "sum(container_memory_cache{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "G" }, @@ -1076,7 +1054,6 @@ "expr": "sum(container_memory_swap{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "H" } @@ -1292,7 +1269,6 @@ "expr": "sum(irate(container_network_receive_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -1300,7 +1276,6 @@ "expr": "sum(irate(container_network_transmit_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -1308,7 +1283,6 @@ "expr": "sum(irate(container_network_receive_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -1316,7 +1290,6 @@ "expr": "sum(irate(container_network_transmit_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -1324,7 +1297,6 @@ "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -1332,7 +1304,6 @@ "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" } @@ -1423,7 +1394,6 @@ { "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -1501,7 +1471,6 @@ { "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -1591,7 +1560,6 @@ { "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -1669,7 +1637,6 @@ { "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -1759,7 +1726,6 @@ { "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -1837,7 +1803,6 @@ { "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -1928,7 +1893,6 @@ { "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{container!=\"\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -2006,7 +1970,6 @@ { "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{container!=\"\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -2225,7 +2188,6 @@ "expr": "sum by(pod) (rate(container_fs_reads_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -2233,7 +2195,6 @@ "expr": "sum by(pod) (rate(container_fs_writes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -2241,7 +2202,6 @@ "expr": "sum by(pod) (rate(container_fs_reads_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -2249,7 +2209,6 @@ "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -2257,7 +2216,6 @@ "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -2265,7 +2223,6 @@ "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" } @@ -2330,7 +2287,7 @@ "value": "Metrics" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-node.json b/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-node.json index 4ae51fe9..f99927b2 100644 --- a/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-node.json +++ b/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-node.json @@ -62,14 +62,12 @@ { "expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "max capacity", "legendLink": null }, { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -269,7 +267,6 @@ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -277,7 +274,6 @@ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -285,7 +281,6 @@ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -293,7 +288,6 @@ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -301,7 +295,6 @@ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" } @@ -404,14 +397,12 @@ { "expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "max capacity", "legendLink": null }, { "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -656,7 +647,6 @@ "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -664,7 +654,6 @@ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -672,7 +661,6 @@ "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -680,7 +668,6 @@ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -688,7 +675,6 @@ "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -696,7 +682,6 @@ "expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" }, @@ -704,7 +689,6 @@ "expr": "sum(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "G" }, @@ -712,7 +696,6 @@ "expr": "sum(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "H" } @@ -777,7 +760,7 @@ "value": "Metrics" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-pod.json b/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-pod.json index 0c2ac71a..2421b5d3 100644 --- a/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-pod.json +++ b/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-pod.json @@ -69,21 +69,18 @@ { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}) by (container)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{container}}", "legendLink": null }, { "expr": "sum(\n kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "requests", "legendLink": null }, { "expr": "sum(\n kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limits", "legendLink": null } @@ -173,7 +170,6 @@ { "expr": "sum(increase(container_cpu_cfs_throttled_periods_total{job=\"integrations/kubernetes/cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container) /sum(increase(container_cpu_cfs_periods_total{job=\"integrations/kubernetes/cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{container}}", "legendLink": null } @@ -382,7 +378,6 @@ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -390,7 +385,6 @@ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -398,7 +392,6 @@ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -406,7 +399,6 @@ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -414,7 +406,6 @@ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" } @@ -526,21 +517,18 @@ { "expr": "sum(container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{container}}", "legendLink": null }, { "expr": "sum(\n kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "requests", "legendLink": null }, { "expr": "sum(\n kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limits", "legendLink": null } @@ -785,7 +773,6 @@ "expr": "sum(container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -793,7 +780,6 @@ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -801,7 +787,6 @@ "expr": "sum(container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -809,7 +794,6 @@ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -817,7 +801,6 @@ "expr": "sum(container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -825,7 +808,6 @@ "expr": "sum(container_memory_rss{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" }, @@ -833,7 +815,6 @@ "expr": "sum(container_memory_cache{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "G" }, @@ -841,7 +822,6 @@ "expr": "sum(container_memory_swap{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "H" } @@ -932,7 +912,6 @@ { "expr": "sum(irate(container_network_receive_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -1010,7 +989,6 @@ { "expr": "sum(irate(container_network_transmit_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -1100,7 +1078,6 @@ { "expr": "sum(irate(container_network_receive_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -1178,7 +1155,6 @@ { "expr": "sum(irate(container_network_transmit_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -1268,7 +1244,6 @@ { "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -1346,7 +1321,6 @@ { "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -1437,14 +1411,12 @@ { "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Reads", "legendLink": null }, { "expr": "ceil(sum by(pod) (rate(container_fs_writes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Writes", "legendLink": null } @@ -1522,14 +1494,12 @@ { "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Reads", "legendLink": null }, { "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Writes", "legendLink": null } @@ -1620,7 +1590,6 @@ { "expr": "ceil(sum by(container) (rate(container_fs_reads_total{job=\"integrations/kubernetes/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"integrations/kubernetes/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{container}}", "legendLink": null } @@ -1698,7 +1667,6 @@ { "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"integrations/kubernetes/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"integrations/kubernetes/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{container}}", "legendLink": null } @@ -1917,7 +1885,6 @@ "expr": "sum by(container) (rate(container_fs_reads_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -1925,7 +1892,6 @@ "expr": "sum by(container) (rate(container_fs_writes_total{job=\"integrations/kubernetes/cadvisor\",device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -1933,7 +1899,6 @@ "expr": "sum by(container) (rate(container_fs_reads_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -1941,7 +1906,6 @@ "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -1949,7 +1913,6 @@ "expr": "sum by(container) (rate(container_fs_writes_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -1957,7 +1920,6 @@ "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" } @@ -2022,7 +1984,7 @@ "value": "Metrics" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-workload.json b/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-workload.json index 31945c19..a7437e87 100644 --- a/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-workload.json +++ b/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-workload.json @@ -50,7 +50,6 @@ { "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -250,7 +249,6 @@ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -258,7 +256,6 @@ "expr": "sum(\n kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -266,7 +263,6 @@ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -274,7 +270,6 @@ "expr": "sum(\n kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -282,7 +277,6 @@ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" } @@ -373,7 +367,6 @@ { "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -573,7 +566,6 @@ "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -581,7 +573,6 @@ "expr": "sum(\n kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -589,7 +580,6 @@ "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -597,7 +587,6 @@ "expr": "sum(\n kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -605,7 +594,6 @@ "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" } @@ -821,7 +809,6 @@ "expr": "(sum(irate(container_network_receive_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -829,7 +816,6 @@ "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -837,7 +823,6 @@ "expr": "(sum(irate(container_network_receive_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -845,7 +830,6 @@ "expr": "(sum(irate(container_network_transmit_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -853,7 +837,6 @@ "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -861,7 +844,6 @@ "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" } @@ -952,7 +934,6 @@ { "expr": "(sum(irate(container_network_receive_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -1030,7 +1011,6 @@ { "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -1120,7 +1100,6 @@ { "expr": "(avg(irate(container_network_receive_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -1198,7 +1177,6 @@ { "expr": "(avg(irate(container_network_transmit_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -1288,7 +1266,6 @@ { "expr": "(sum(irate(container_network_receive_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -1366,7 +1343,6 @@ { "expr": "(sum(irate(container_network_transmit_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -1456,7 +1432,6 @@ { "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -1534,7 +1509,6 @@ { "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -1598,7 +1572,7 @@ "value": "Metrics" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-workloads-namespace.json b/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-workloads-namespace.json index 173ea7aa..d4b32750 100644 --- a/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-workloads-namespace.json +++ b/monitoring-mixins/kubernetes-mixin/deploy/dashboards_out/k8s-resources-workloads-namespace.json @@ -73,21 +73,18 @@ { "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{workload}} - {{workload_type}}", "legendLink": null }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "quota - requests", "legendLink": null }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "quota - limits", "legendLink": null } @@ -317,7 +314,6 @@ "expr": "count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload, workload_type)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -325,7 +321,6 @@ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -333,7 +328,6 @@ "expr": "sum(\n kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -341,7 +335,6 @@ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -349,7 +342,6 @@ "expr": "sum(\n kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -357,7 +349,6 @@ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" } @@ -471,21 +462,18 @@ { "expr": "sum(\n container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{workload}} - {{workload_type}}", "legendLink": null }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "quota - requests", "legendLink": null }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "quota - limits", "legendLink": null } @@ -715,7 +703,6 @@ "expr": "count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload, workload_type)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -723,7 +710,6 @@ "expr": "sum(\n container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -731,7 +717,6 @@ "expr": "sum(\n kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -739,7 +724,6 @@ "expr": "sum(\n container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -747,7 +731,6 @@ "expr": "sum(\n kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -755,7 +738,6 @@ "expr": "sum(\n container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" } @@ -986,7 +968,6 @@ "expr": "(sum(irate(container_network_receive_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -994,7 +975,6 @@ "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -1002,7 +982,6 @@ "expr": "(sum(irate(container_network_receive_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -1010,7 +989,6 @@ "expr": "(sum(irate(container_network_transmit_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -1018,7 +996,6 @@ "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -1026,7 +1003,6 @@ "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" } @@ -1117,7 +1093,6 @@ { "expr": "(sum(irate(container_network_receive_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{workload}}", "legendLink": null } @@ -1195,7 +1170,6 @@ { "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{workload}}", "legendLink": null } @@ -1285,7 +1259,6 @@ { "expr": "(avg(irate(container_network_receive_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{workload}}", "legendLink": null } @@ -1363,7 +1336,6 @@ { "expr": "(avg(irate(container_network_transmit_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{workload}}", "legendLink": null } @@ -1453,7 +1425,6 @@ { "expr": "(sum(irate(container_network_receive_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{workload}}", "legendLink": null } @@ -1531,7 +1502,6 @@ { "expr": "(sum(irate(container_network_transmit_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{workload}}", "legendLink": null } @@ -1621,7 +1591,6 @@ { "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{workload}}", "legendLink": null } @@ -1699,7 +1668,6 @@ { "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{workload}}", "legendLink": null } @@ -1763,7 +1731,7 @@ "value": "Metrics" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/kubernetes-mixin/deploy/manifests/k8s-all-in-one.yaml b/monitoring-mixins/kubernetes-mixin/deploy/manifests/k8s-all-in-one.yaml index 958ff4f2..43de036b 100644 --- a/monitoring-mixins/kubernetes-mixin/deploy/manifests/k8s-all-in-one.yaml +++ b/monitoring-mixins/kubernetes-mixin/deploy/manifests/k8s-all-in-one.yaml @@ -4378,7 +4378,6 @@ data: "expr": "cluster:node_cpu:ratio_rate5m{cluster=\"$cluster\"}", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -4457,7 +4456,6 @@ data: "expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"integrations/kubernetes/kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -4536,7 +4534,6 @@ data: "expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"integrations/kubernetes/kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -4615,7 +4612,6 @@ data: "expr": "1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(node_memory_MemTotal_bytes{job=\"integrations/node_exporter\",cluster=\"$cluster\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -4694,7 +4690,6 @@ data: "expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"integrations/kubernetes/kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -4773,7 +4768,6 @@ data: "expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"integrations/kubernetes/kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -4862,7 +4856,6 @@ data: { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null } @@ -5092,7 +5085,6 @@ data: "expr": "sum(kube_pod_owner{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -5100,7 +5092,6 @@ data: "expr": "count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -5108,7 +5099,6 @@ data: "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -5116,7 +5106,6 @@ data: "expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -5124,7 +5113,6 @@ data: "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -5132,7 +5120,6 @@ data: "expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" }, @@ -5140,7 +5127,6 @@ data: "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "G" } @@ -5231,7 +5217,6 @@ data: { "expr": "sum(container_memory_rss{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null } @@ -5461,7 +5446,6 @@ data: "expr": "sum(kube_pod_owner{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -5469,7 +5453,6 @@ data: "expr": "count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -5477,7 +5460,6 @@ data: "expr": "sum(container_memory_rss{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -5485,7 +5467,6 @@ data: "expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -5493,7 +5474,6 @@ data: "expr": "sum(container_memory_rss{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -5501,7 +5481,6 @@ data: "expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" }, @@ -5509,7 +5488,6 @@ data: "expr": "sum(container_memory_rss{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "G" } @@ -5725,7 +5703,6 @@ data: "expr": "sum(irate(container_network_receive_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -5733,7 +5710,6 @@ data: "expr": "sum(irate(container_network_transmit_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -5741,7 +5717,6 @@ data: "expr": "sum(irate(container_network_receive_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -5749,7 +5724,6 @@ data: "expr": "sum(irate(container_network_transmit_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -5757,7 +5731,6 @@ data: "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -5765,7 +5738,6 @@ data: "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" } @@ -5856,7 +5828,6 @@ data: { "expr": "sum(irate(container_network_receive_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null } @@ -5934,7 +5905,6 @@ data: { "expr": "sum(irate(container_network_transmit_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null } @@ -6024,7 +5994,6 @@ data: { "expr": "avg(irate(container_network_receive_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null } @@ -6102,7 +6071,6 @@ data: { "expr": "avg(irate(container_network_transmit_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null } @@ -6192,7 +6160,6 @@ data: { "expr": "sum(irate(container_network_receive_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null } @@ -6270,7 +6237,6 @@ data: { "expr": "sum(irate(container_network_transmit_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null } @@ -6360,7 +6326,6 @@ data: { "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null } @@ -6438,7 +6403,6 @@ data: { "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null } @@ -6529,7 +6493,6 @@ data: { "expr": "ceil(sum by(namespace) (rate(container_fs_reads_total{job=\"integrations/kubernetes/cadvisor\", container!=\"\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"integrations/kubernetes/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval])))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null } @@ -6607,7 +6570,6 @@ data: { "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"integrations/kubernetes/cadvisor\", container!=\"\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"integrations/kubernetes/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{namespace}}", "legendLink": null } @@ -6826,7 +6788,6 @@ data: "expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -6834,7 +6795,6 @@ data: "expr": "sum by(namespace) (rate(container_fs_writes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -6842,7 +6802,6 @@ data: "expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -6850,7 +6809,6 @@ data: "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -6858,7 +6816,6 @@ data: "expr": "sum by(namespace) (rate(container_fs_writes_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -6866,7 +6823,6 @@ data: "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" } @@ -6931,7 +6887,7 @@ data: "value": "Metrics" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -7064,7 +7020,6 @@ data: "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -7143,7 +7098,6 @@ data: "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -7222,7 +7176,6 @@ data: "expr": "sum(container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -7301,7 +7254,6 @@ data: "expr": "sum(container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -7413,21 +7365,18 @@ data: { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "quota - requests", "legendLink": null }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "quota - limits", "legendLink": null } @@ -7627,7 +7576,6 @@ data: "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -7635,7 +7583,6 @@ data: "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -7643,7 +7590,6 @@ data: "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -7651,7 +7597,6 @@ data: "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -7659,7 +7604,6 @@ data: "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" } @@ -7773,21 +7717,18 @@ data: { "expr": "sum(container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "quota - requests", "legendLink": null }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "quota - limits", "legendLink": null } @@ -8032,7 +7973,6 @@ data: "expr": "sum(container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -8040,7 +7980,6 @@ data: "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -8048,7 +7987,6 @@ data: "expr": "sum(container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -8056,7 +7994,6 @@ data: "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -8064,7 +8001,6 @@ data: "expr": "sum(container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -8072,7 +8008,6 @@ data: "expr": "sum(container_memory_rss{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" }, @@ -8080,7 +8015,6 @@ data: "expr": "sum(container_memory_cache{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "G" }, @@ -8088,7 +8022,6 @@ data: "expr": "sum(container_memory_swap{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "H" } @@ -8304,7 +8237,6 @@ data: "expr": "sum(irate(container_network_receive_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -8312,7 +8244,6 @@ data: "expr": "sum(irate(container_network_transmit_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -8320,7 +8251,6 @@ data: "expr": "sum(irate(container_network_receive_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -8328,7 +8258,6 @@ data: "expr": "sum(irate(container_network_transmit_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -8336,7 +8265,6 @@ data: "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -8344,7 +8272,6 @@ data: "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" } @@ -8435,7 +8362,6 @@ data: { "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -8513,7 +8439,6 @@ data: { "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -8603,7 +8528,6 @@ data: { "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -8681,7 +8605,6 @@ data: { "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -8771,7 +8694,6 @@ data: { "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -8849,7 +8771,6 @@ data: { "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -8940,7 +8861,6 @@ data: { "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{container!=\"\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -9018,7 +8938,6 @@ data: { "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{container!=\"\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -9237,7 +9156,6 @@ data: "expr": "sum by(pod) (rate(container_fs_reads_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -9245,7 +9163,6 @@ data: "expr": "sum by(pod) (rate(container_fs_writes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -9253,7 +9170,6 @@ data: "expr": "sum by(pod) (rate(container_fs_reads_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -9261,7 +9177,6 @@ data: "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -9269,7 +9184,6 @@ data: "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -9277,7 +9191,6 @@ data: "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" } @@ -9342,7 +9255,7 @@ data: "value": "Metrics" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -9508,14 +9421,12 @@ data: { "expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "max capacity", "legendLink": null }, { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -9715,7 +9626,6 @@ data: "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -9723,7 +9633,6 @@ data: "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -9731,7 +9640,6 @@ data: "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -9739,7 +9647,6 @@ data: "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -9747,7 +9654,6 @@ data: "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" } @@ -9850,14 +9756,12 @@ data: { "expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "max capacity", "legendLink": null }, { "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -10102,7 +10006,6 @@ data: "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -10110,7 +10013,6 @@ data: "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -10118,7 +10020,6 @@ data: "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -10126,7 +10027,6 @@ data: "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -10134,7 +10034,6 @@ data: "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -10142,7 +10041,6 @@ data: "expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" }, @@ -10150,7 +10048,6 @@ data: "expr": "sum(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "G" }, @@ -10158,7 +10055,6 @@ data: "expr": "sum(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "H" } @@ -10223,7 +10119,7 @@ data: "value": "Metrics" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -10396,21 +10292,18 @@ data: { "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}) by (container)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{container}}", "legendLink": null }, { "expr": "sum(\n kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "requests", "legendLink": null }, { "expr": "sum(\n kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limits", "legendLink": null } @@ -10500,7 +10393,6 @@ data: { "expr": "sum(increase(container_cpu_cfs_throttled_periods_total{job=\"integrations/kubernetes/cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container) /sum(increase(container_cpu_cfs_periods_total{job=\"integrations/kubernetes/cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{container}}", "legendLink": null } @@ -10709,7 +10601,6 @@ data: "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -10717,7 +10608,6 @@ data: "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -10725,7 +10615,6 @@ data: "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -10733,7 +10622,6 @@ data: "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -10741,7 +10629,6 @@ data: "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" } @@ -10853,21 +10740,18 @@ data: { "expr": "sum(container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{container}}", "legendLink": null }, { "expr": "sum(\n kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "requests", "legendLink": null }, { "expr": "sum(\n kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limits", "legendLink": null } @@ -11112,7 +10996,6 @@ data: "expr": "sum(container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -11120,7 +11003,6 @@ data: "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -11128,7 +11010,6 @@ data: "expr": "sum(container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -11136,7 +11017,6 @@ data: "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -11144,7 +11024,6 @@ data: "expr": "sum(container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -11152,7 +11031,6 @@ data: "expr": "sum(container_memory_rss{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" }, @@ -11160,7 +11038,6 @@ data: "expr": "sum(container_memory_cache{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "G" }, @@ -11168,7 +11045,6 @@ data: "expr": "sum(container_memory_swap{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "H" } @@ -11259,7 +11135,6 @@ data: { "expr": "sum(irate(container_network_receive_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -11337,7 +11212,6 @@ data: { "expr": "sum(irate(container_network_transmit_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -11427,7 +11301,6 @@ data: { "expr": "sum(irate(container_network_receive_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -11505,7 +11378,6 @@ data: { "expr": "sum(irate(container_network_transmit_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -11595,7 +11467,6 @@ data: { "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -11673,7 +11544,6 @@ data: { "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -11764,14 +11634,12 @@ data: { "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Reads", "legendLink": null }, { "expr": "ceil(sum by(pod) (rate(container_fs_writes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Writes", "legendLink": null } @@ -11849,14 +11717,12 @@ data: { "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Reads", "legendLink": null }, { "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Writes", "legendLink": null } @@ -11947,7 +11813,6 @@ data: { "expr": "ceil(sum by(container) (rate(container_fs_reads_total{job=\"integrations/kubernetes/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"integrations/kubernetes/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{container}}", "legendLink": null } @@ -12025,7 +11890,6 @@ data: { "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"integrations/kubernetes/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"integrations/kubernetes/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{container}}", "legendLink": null } @@ -12244,7 +12108,6 @@ data: "expr": "sum by(container) (rate(container_fs_reads_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -12252,7 +12115,6 @@ data: "expr": "sum by(container) (rate(container_fs_writes_total{job=\"integrations/kubernetes/cadvisor\",device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -12260,7 +12122,6 @@ data: "expr": "sum by(container) (rate(container_fs_reads_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -12268,7 +12129,6 @@ data: "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -12276,7 +12136,6 @@ data: "expr": "sum by(container) (rate(container_fs_writes_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -12284,7 +12143,6 @@ data: "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"integrations/kubernetes/cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" } @@ -12349,7 +12207,7 @@ data: "value": "Metrics" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -12526,7 +12384,6 @@ data: { "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -12726,7 +12583,6 @@ data: "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -12734,7 +12590,6 @@ data: "expr": "sum(\n kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -12742,7 +12597,6 @@ data: "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -12750,7 +12604,6 @@ data: "expr": "sum(\n kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -12758,7 +12611,6 @@ data: "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" } @@ -12849,7 +12701,6 @@ data: { "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -13049,7 +12900,6 @@ data: "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -13057,7 +12907,6 @@ data: "expr": "sum(\n kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -13065,7 +12914,6 @@ data: "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -13073,7 +12921,6 @@ data: "expr": "sum(\n kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -13081,7 +12928,6 @@ data: "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" } @@ -13297,7 +13143,6 @@ data: "expr": "(sum(irate(container_network_receive_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -13305,7 +13150,6 @@ data: "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -13313,7 +13157,6 @@ data: "expr": "(sum(irate(container_network_receive_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -13321,7 +13164,6 @@ data: "expr": "(sum(irate(container_network_transmit_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -13329,7 +13171,6 @@ data: "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -13337,7 +13178,6 @@ data: "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" } @@ -13428,7 +13268,6 @@ data: { "expr": "(sum(irate(container_network_receive_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -13506,7 +13345,6 @@ data: { "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -13596,7 +13434,6 @@ data: { "expr": "(avg(irate(container_network_receive_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -13674,7 +13511,6 @@ data: { "expr": "(avg(irate(container_network_transmit_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -13764,7 +13600,6 @@ data: { "expr": "(sum(irate(container_network_receive_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -13842,7 +13677,6 @@ data: { "expr": "(sum(irate(container_network_transmit_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -13932,7 +13766,6 @@ data: { "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -14010,7 +13843,6 @@ data: { "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -14074,7 +13906,7 @@ data: "value": "Metrics" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -14297,21 +14129,18 @@ data: { "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{workload}} - {{workload_type}}", "legendLink": null }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "quota - requests", "legendLink": null }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "quota - limits", "legendLink": null } @@ -14541,7 +14370,6 @@ data: "expr": "count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload, workload_type)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -14549,7 +14377,6 @@ data: "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -14557,7 +14384,6 @@ data: "expr": "sum(\n kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -14565,7 +14391,6 @@ data: "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -14573,7 +14398,6 @@ data: "expr": "sum(\n kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -14581,7 +14405,6 @@ data: "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" } @@ -14695,21 +14518,18 @@ data: { "expr": "sum(\n container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{workload}} - {{workload_type}}", "legendLink": null }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "quota - requests", "legendLink": null }, { "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "quota - limits", "legendLink": null } @@ -14939,7 +14759,6 @@ data: "expr": "count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload, workload_type)", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -14947,7 +14766,6 @@ data: "expr": "sum(\n container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -14955,7 +14773,6 @@ data: "expr": "sum(\n kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -14963,7 +14780,6 @@ data: "expr": "sum(\n container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -14971,7 +14787,6 @@ data: "expr": "sum(\n kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -14979,7 +14794,6 @@ data: "expr": "sum(\n container_memory_working_set_bytes{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"integrations/kubernetes/kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" } @@ -15210,7 +15024,6 @@ data: "expr": "(sum(irate(container_network_receive_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "A" }, @@ -15218,7 +15031,6 @@ data: "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "B" }, @@ -15226,7 +15038,6 @@ data: "expr": "(sum(irate(container_network_receive_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "C" }, @@ -15234,7 +15045,6 @@ data: "expr": "(sum(irate(container_network_transmit_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "D" }, @@ -15242,7 +15052,6 @@ data: "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "E" }, @@ -15250,7 +15059,6 @@ data: "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n", "format": "table", "instant": true, - "intervalFactor": 2, "legendFormat": "", "refId": "F" } @@ -15341,7 +15149,6 @@ data: { "expr": "(sum(irate(container_network_receive_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{workload}}", "legendLink": null } @@ -15419,7 +15226,6 @@ data: { "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{workload}}", "legendLink": null } @@ -15509,7 +15315,6 @@ data: { "expr": "(avg(irate(container_network_receive_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{workload}}", "legendLink": null } @@ -15587,7 +15392,6 @@ data: { "expr": "(avg(irate(container_network_transmit_bytes_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{workload}}", "legendLink": null } @@ -15677,7 +15481,6 @@ data: { "expr": "(sum(irate(container_network_receive_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{workload}}", "legendLink": null } @@ -15755,7 +15558,6 @@ data: { "expr": "(sum(irate(container_network_transmit_packets_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{workload}}", "legendLink": null } @@ -15845,7 +15647,6 @@ data: { "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{workload}}", "legendLink": null } @@ -15923,7 +15724,6 @@ data: { "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"integrations/kubernetes/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{workload}}", "legendLink": null } @@ -15987,7 +15787,7 @@ data: "value": "Metrics" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/kubernetes-mixin/deploy/rules.yaml b/monitoring-mixins/kubernetes-mixin/deploy/rules.yaml index a9c61f0a..a10598b3 100644 --- a/monitoring-mixins/kubernetes-mixin/deploy/rules.yaml +++ b/monitoring-mixins/kubernetes-mixin/deploy/rules.yaml @@ -658,8 +658,8 @@ groups: - expr: | count by (cluster, node) ( node_cpu_seconds_total{mode="idle",job="integrations/node_exporter"} - * on (namespace, pod) group_left(node) - topk by(namespace, pod) (1, node_namespace_pod:kube_pod_info:) + * on (cluster, namespace, pod) group_left(node) + topk by(cluster, namespace, pod) (1, node_namespace_pod:kube_pod_info:) ) record: node:node_num_cpu:sum - expr: | diff --git a/monitoring-mixins/kubernetes-mixin/jsonnetfile.lock.json b/monitoring-mixins/kubernetes-mixin/jsonnetfile.lock.json index b72e1ae2..3deb2048 100644 --- a/monitoring-mixins/kubernetes-mixin/jsonnetfile.lock.json +++ b/monitoring-mixins/kubernetes-mixin/jsonnetfile.lock.json @@ -18,8 +18,8 @@ "subdir": "grafana-builder" } }, - "version": "1fa8fdbf030cecf0c0900cda2954585f08a88b60", - "sum": "aCN8uCrs2PDLR0SzRAuwZ6C5hiKt1KggCUCT7/F8yZ0=" + "version": "02db06f540086fa3f67d487bd01e1b314853fb8f", + "sum": "B49EzIY2WZsFxNMJcgRxE/gcZ9ltnS8pkOOV6Q5qioc=" }, { "source": { @@ -28,8 +28,8 @@ "subdir": "" } }, - "version": "bcf8426b9c5ee85fdf8a6d9c62708f94e0367b21", - "sum": "1pCIS5kwa2b5JniHr3WV5wwiau29gM0fNQmqO2mXiCQ=" + "version": "c72ac0392998343d53bd73343467f8bf2aa4e333", + "sum": "VWD9c9I1Q2252HgV/Ao/4mvdATy5GAyZASO30ituyJU=" } ], "legacyImports": false diff --git a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-chunks.json b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-chunks.json index 60aff469..475406fe 100644 --- a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-chunks.json +++ b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-chunks.json @@ -60,7 +60,6 @@ { "expr": "sum(loki_ingester_memory_chunks{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "series", "legendLink": null } @@ -135,7 +134,6 @@ { "expr": "sum(loki_ingester_memory_chunks{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}) / sum(loki_ingester_memory_streams{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "chunks", "legendLink": null } @@ -222,21 +220,18 @@ { "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th Percentile", "refId": "A" }, { "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th Percentile", "refId": "B" }, { "expr": "sum(rate(loki_ingester_chunk_utilization_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_utilization_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C" } @@ -311,21 +306,18 @@ { "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th Percentile", "refId": "A" }, { "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th Percentile", "refId": "B" }, { "expr": "sum(rate(loki_ingester_chunk_age_seconds_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) * 1e3 / sum(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C" } @@ -412,21 +404,18 @@ { "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_entries_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th Percentile", "refId": "A" }, { "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_entries_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th Percentile", "refId": "B" }, { "expr": "sum(rate(loki_ingester_chunk_entries_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_entries_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C" } @@ -501,7 +490,6 @@ { "expr": "sum(rate(loki_chunk_store_index_entries_per_chunk_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) / sum(rate(loki_chunk_store_index_entries_per_chunk_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Index Entries", "legendLink": null } @@ -586,9 +574,8 @@ "steppedLine": false, "targets": [ { - "expr": "cortex_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}", + "expr": "loki_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"} or cortex_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -636,6 +623,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -670,9 +658,8 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -759,7 +746,6 @@ { "expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -834,7 +820,6 @@ { "expr": "sum by (reason) (rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{reason}}", "legendLink": null } @@ -1045,21 +1030,18 @@ { "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[1m])) by (le))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "p99", "legendLink": null }, { "expr": "histogram_quantile(0.90, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[1m])) by (le))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "p90", "legendLink": null }, { "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[1m])) by (le))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "p50", "legendLink": null } @@ -1146,21 +1128,18 @@ { "expr": "histogram_quantile(0.5, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) by (le))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "p50", "legendLink": null }, { "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) by (le))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "p99", "legendLink": null }, { "expr": "sum(rate(loki_ingester_chunk_bounds_hours_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) / sum(rate(loki_ingester_chunk_bounds_hours_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "avg", "legendLink": null } @@ -1223,7 +1202,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-deletion.json b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-deletion.json index 4269345c..df0fe0eb 100644 --- a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-deletion.json +++ b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-deletion.json @@ -62,7 +62,6 @@ "expr": "sum(loki_compactor_pending_delete_requests_count{cluster=~\"$cluster\", namespace=~\"$namespace\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -138,7 +137,6 @@ "expr": "max(loki_compactor_oldest_pending_delete_request_age_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -224,7 +222,6 @@ { "expr": "(loki_compactor_delete_requests_received_total{cluster=~\"$cluster\", namespace=~\"$namespace\"} or on() vector(0)) - on () (loki_compactor_delete_requests_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"} or on () vector(0))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "in progress", "legendLink": null } @@ -299,7 +296,6 @@ { "expr": "sum(increase(loki_compactor_delete_requests_received_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "received", "legendLink": null } @@ -374,7 +370,6 @@ { "expr": "sum(increase(loki_compactor_delete_requests_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "processed", "legendLink": null } @@ -461,7 +456,6 @@ { "expr": "node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"}", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -536,7 +530,6 @@ { "expr": "go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} / 1024 / 1024 ", "format": "time_series", - "intervalFactor": 2, "legendFormat": " {{pod}} ", "legendLink": null } @@ -611,7 +604,6 @@ { "expr": "loki_boltdb_shipper_compact_tables_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -698,7 +690,6 @@ { "expr": "sum(increase(loki_compactor_load_pending_requests_attempts_total{status=\"fail\", cluster=~\"$cluster\", namespace=~\"$namespace\"}[1h]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "failures", "legendLink": null } @@ -773,7 +764,6 @@ { "expr": "sum(rate(loki_compactor_deleted_lines{cluster=~\"$cluster\",job=~\"$namespace/compactor\"}[$__rate_interval])) by (user)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{user}}", "legendLink": null } @@ -874,7 +864,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-logs.json b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-logs.json index ea9b78d2..160df2ca 100644 --- a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-logs.json +++ b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-logs.json @@ -77,7 +77,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -164,7 +164,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -250,7 +250,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -336,7 +336,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -422,7 +422,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -508,7 +508,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -595,7 +595,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -682,7 +682,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -787,7 +787,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -861,7 +861,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-mixin-recording-rules.json b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-mixin-recording-rules.json index b917b566..735956fd 100644 --- a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-mixin-recording-rules.json +++ b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-mixin-recording-rules.json @@ -600,7 +600,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-operational.json b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-operational.json index 61be4940..830fc60b 100644 --- a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-operational.json +++ b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-operational.json @@ -102,7 +102,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -198,7 +198,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -229,6 +229,102 @@ "alignLevel": null } }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { } + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 1 + }, + "hiddenSeries": false, + "id": 11, + "legend": { + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(5, sum by (name,level) (rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\"}[$__interval])) - \nsum by (name,level) (rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\"}[$__interval] offset 1h)))", + "legendFormat": "{{name}}-{{level}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Bad Words", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { "aliasColors": { }, "bars": false, @@ -293,7 +389,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -389,7 +485,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -485,7 +581,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -591,7 +687,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -697,7 +793,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -794,7 +890,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -903,7 +999,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -1000,7 +1096,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -1109,7 +1205,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -1215,7 +1311,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -1312,7 +1408,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -1420,7 +1516,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -1517,7 +1613,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -1618,7 +1714,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -1815,7 +1911,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -1907,7 +2003,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -1999,7 +2095,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2116,7 +2212,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2205,7 +2301,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2294,7 +2390,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2383,7 +2479,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2488,7 +2584,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2580,7 +2676,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2672,7 +2768,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2789,7 +2885,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2896,7 +2992,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2987,7 +3083,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -3103,7 +3199,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -3256,7 +3352,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -3428,7 +3524,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -3520,7 +3616,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -3612,7 +3708,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -3729,7 +3825,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -3849,7 +3945,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -3941,7 +4037,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4061,7 +4157,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4153,7 +4249,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4267,7 +4363,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4370,7 +4466,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4473,7 +4569,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4571,7 +4667,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4661,7 +4757,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4751,7 +4847,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4841,7 +4937,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4931,7 +5027,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5051,7 +5147,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5143,7 +5239,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5231,7 +5327,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -5245,7 +5341,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5317,7 +5413,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -5331,7 +5427,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5403,7 +5499,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -5417,7 +5513,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5489,7 +5585,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -5503,7 +5599,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5575,17 +5671,17 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", + "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", "legendFormat": ".99", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", + "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", "legendFormat": ".9", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", + "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", "legendFormat": ".5", "refId": "C" } @@ -5600,7 +5696,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5675,19 +5771,19 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "intervalFactor": 1, "legendFormat": ".99-{{operation}}", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".9-{{operation}}", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".5-{{operation}}", "refId": "C" @@ -5703,7 +5799,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5778,7 +5874,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", + "expr": "sum(rate(loki_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", "intervalFactor": 1, "legendFormat": "{{status_code}}-{{operation}}", "refId": "A" @@ -5794,7 +5890,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5913,7 +6009,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6004,7 +6100,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6123,7 +6219,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6214,7 +6310,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6333,7 +6429,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6424,7 +6520,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6476,7 +6572,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-reads-resources.json b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-reads-resources.json index 947b858e..a2ba72ef 100644 --- a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-reads-resources.json +++ b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-reads-resources.json @@ -71,21 +71,18 @@ { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -169,21 +166,18 @@ { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -256,7 +250,6 @@ { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -352,21 +345,18 @@ { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -450,21 +440,18 @@ { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -537,7 +524,6 @@ { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/query-scheduler\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -634,21 +620,18 @@ { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -733,21 +716,18 @@ { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -821,7 +801,6 @@ { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/querier\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -895,7 +874,6 @@ { "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"querier\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}} - {{device}}", "legendLink": null } @@ -971,7 +949,6 @@ { "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"querier\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}} - {{device}}", "legendLink": null } @@ -1047,7 +1024,6 @@ { "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"querier.*\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{persistentvolumeclaim}}", "legendLink": null } @@ -1147,21 +1123,18 @@ { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -1246,21 +1219,18 @@ { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -1334,7 +1304,6 @@ { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/index-gateway\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -1408,7 +1377,6 @@ { "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}} - {{device}}", "legendLink": null } @@ -1484,7 +1452,6 @@ { "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}} - {{device}}", "legendLink": null } @@ -1560,7 +1527,6 @@ { "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"index-gateway.*\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{persistentvolumeclaim}}", "legendLink": null } @@ -1659,21 +1625,18 @@ { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -1757,21 +1720,18 @@ { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -1844,7 +1804,6 @@ { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ingester.+\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -1928,9 +1887,8 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})", + "expr": "sum by(pod) (loki_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"}) or sum by(pod) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -2017,21 +1975,18 @@ { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -2116,21 +2071,18 @@ { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -2204,7 +2156,6 @@ { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -2266,7 +2217,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-reads.json b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-reads.json index cc785b6b..9053f964 100644 --- a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-reads.json +++ b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-reads.json @@ -33,6 +33,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -67,9 +68,8 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -144,7 +144,6 @@ { "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} 99th percentile", "refId": "A", "step": 10 @@ -152,7 +151,6 @@ { "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} 50th percentile", "refId": "B", "step": 10 @@ -160,7 +158,6 @@ { "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) ", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} Average", "refId": "C", "step": 10 @@ -209,13 +206,16 @@ "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" - } + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "unit": "s" } }, "fill": 1, @@ -306,6 +306,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -340,9 +341,8 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -417,7 +417,6 @@ { "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} 99th percentile", "refId": "A", "step": 10 @@ -425,7 +424,6 @@ { "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} 50th percentile", "refId": "B", "step": 10 @@ -433,7 +431,6 @@ { "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) ", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} Average", "refId": "C", "step": 10 @@ -482,13 +479,16 @@ "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" - } + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "unit": "s" } }, "fill": 1, @@ -579,6 +579,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -613,9 +614,8 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -690,7 +690,6 @@ { "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} 99th percentile", "refId": "A", "step": 10 @@ -698,7 +697,6 @@ { "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} 50th percentile", "refId": "B", "step": 10 @@ -706,7 +704,6 @@ { "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) ", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} Average", "refId": "C", "step": 10 @@ -755,13 +752,16 @@ "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" - } + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "unit": "s" } }, "fill": 1, @@ -852,6 +852,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -886,9 +887,8 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -963,7 +963,6 @@ { "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} 99th percentile", "refId": "A", "step": 10 @@ -971,7 +970,6 @@ { "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} 50th percentile", "refId": "B", "step": 10 @@ -979,7 +977,6 @@ { "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) ", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} Average", "refId": "C", "step": 10 @@ -1028,13 +1025,16 @@ "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" - } + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "unit": "s" } }, "fill": 1, @@ -1125,6 +1125,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -1159,9 +1160,8 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -1236,21 +1236,18 @@ { "expr": "histogram_quantile(0.99, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th Percentile", "refId": "A" }, { "expr": "histogram_quantile(0.50, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th Percentile", "refId": "B" }, { "expr": "sum(rate(loki_index_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval])) * 1e3 / sum(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C" } @@ -1298,13 +1295,16 @@ "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" - } + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "unit": "s" } }, "fill": 1, @@ -1395,6 +1395,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -1429,9 +1430,8 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -1506,21 +1506,18 @@ { "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th Percentile", "refId": "A" }, { "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th Percentile", "refId": "B" }, { "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C" } @@ -1568,13 +1565,16 @@ "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" - } + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "unit": "s" } }, "fill": 1, @@ -1668,7 +1668,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-retention.json b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-retention.json index eb729c55..e6d4a71a 100644 --- a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-retention.json +++ b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-retention.json @@ -71,21 +71,18 @@ { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -169,21 +166,18 @@ { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -256,7 +250,6 @@ { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/compactor\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -368,7 +361,7 @@ "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 6, "stack": false, "steppedLine": false, "targets": [ @@ -382,7 +375,7 @@ "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Last Compact and Mark Operation Success", + "title": "Last Compact Tables Operation Success", "tooltip": { "shared": true, "sort": 2, @@ -442,14 +435,13 @@ "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 6, "stack": false, "steppedLine": false, "targets": [ { "expr": "loki_boltdb_shipper_compact_tables_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}", "format": "time_series", - "intervalFactor": 2, "legendFormat": "duration", "legendLink": null } @@ -457,7 +449,7 @@ "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Compact and Mark Operations Duration", + "title": "Compact Tables Operations Duration", "tooltip": { "shared": true, "sort": 2, @@ -489,7 +481,19 @@ "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Compaction", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { }, "bars": false, @@ -517,14 +521,87 @@ "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(loki_compactor_skipped_compacting_locked_table_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__range]))", + "format": "time_series", + "legendFormat": "{{table_name}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Number of times Tables were skipped during Compaction", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum by (status)(rate(loki_boltdb_shipper_compact_tables_operation_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{success}}", "legendLink": null } @@ -532,7 +609,275 @@ "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Compact and Mark Operations Per Status", + "title": "Compact Tables Operations Per Status", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "blue", + "mode": "fixed" + }, + "custom": { }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "dateTimeFromNow" + } + }, + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": { }, + "textMode": "auto" + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "loki_compactor_apply_retention_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"} * 1e3", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Last Mark Operation Success", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "loki_compactor_apply_retention_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}", + "format": "time_series", + "legendFormat": "duration", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Mark Operations Duration", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status)(rate(loki_compactor_apply_retention_operation_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{success}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Mark Operations Per Status", "tooltip": { "shared": true, "sort": 2, @@ -570,7 +915,7 @@ "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Compact and Mark", + "title": "Retention", "titleSize": "h6" }, { @@ -584,7 +929,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 7, + "id": 11, "legend": { "avg": false, "current": false, @@ -611,7 +956,6 @@ { "expr": "count by(action)(loki_boltdb_shipper_retention_marker_table_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{action}}", "legendLink": null } @@ -659,7 +1003,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 8, + "id": 12, "legend": { "avg": false, "current": false, @@ -686,7 +1030,6 @@ { "expr": "count by(table,action)(loki_boltdb_shipper_retention_marker_table_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\" , action=~\"modified|deleted\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{table}}-{{action}}", "legendLink": null } @@ -734,7 +1077,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 9, + "id": 13, "legend": { "avg": false, "current": false, @@ -761,7 +1104,6 @@ { "expr": "sum by (table)(rate(loki_boltdb_shipper_retention_marker_count_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >0", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{table}}", "legendLink": null } @@ -822,7 +1164,7 @@ "datasource": "$datasource", "fill": 1, "format": "short", - "id": 10, + "id": 14, "legend": { "avg": false, "current": false, @@ -850,7 +1192,6 @@ "expr": "sum (increase(loki_boltdb_shipper_retention_marker_count_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[24h]))", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -897,7 +1238,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 11, + "id": 15, "legend": { "avg": false, "current": false, @@ -924,21 +1265,18 @@ { "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th Percentile", "refId": "A" }, { "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th Percentile", "refId": "B" }, { "expr": "sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C" } @@ -999,7 +1337,7 @@ "datasource": "$datasource", "fill": 1, "format": "short", - "id": 12, + "id": 16, "legend": { "avg": false, "current": false, @@ -1027,7 +1365,6 @@ "expr": "sum (increase(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[24h]))", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -1074,7 +1411,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 13, + "id": 17, "legend": { "avg": false, "current": false, @@ -1101,21 +1438,18 @@ { "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th Percentile", "refId": "A" }, { "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th Percentile", "refId": "B" }, { "expr": "sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C" } @@ -1175,7 +1509,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 14, + "id": 18, "legend": { "avg": false, "current": false, @@ -1202,7 +1536,6 @@ { "expr": "time() - (loki_boltdb_shipper_retention_sweeper_marker_file_processing_current_time{cluster=~\"$cluster\", namespace=~\"$namespace\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "lag", "legendLink": null } @@ -1250,7 +1583,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 15, + "id": 19, "legend": { "avg": false, "current": false, @@ -1277,7 +1610,6 @@ { "expr": "sum(loki_boltdb_shipper_retention_sweeper_marker_files_current{cluster=~\"$cluster\", namespace=~\"$namespace\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "count", "legendLink": null } @@ -1325,7 +1657,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 16, + "id": 20, "legend": { "avg": false, "current": false, @@ -1352,7 +1684,6 @@ { "expr": "sum by (status)(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "legendLink": null } @@ -1407,7 +1738,7 @@ "panels": [ { "datasource": "$loki_datasource", - "id": 17, + "id": 21, "span": 12, "targets": [ { @@ -1440,7 +1771,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-writes-resources.json b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-writes-resources.json index 4701dc0b..38aff1c0 100644 --- a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-writes-resources.json +++ b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-writes-resources.json @@ -71,21 +71,18 @@ { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -169,21 +166,18 @@ { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -256,7 +250,6 @@ { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -342,7 +335,6 @@ { "expr": "sum by(pod) (loki_ingester_memory_streams{cluster=~\"$cluster\", job=~\"($namespace)/ingester.*\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -427,21 +419,18 @@ { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -526,21 +515,18 @@ { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -614,7 +600,6 @@ { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ingester.*\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -688,7 +673,6 @@ { "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}} - {{device}}", "legendLink": null } @@ -764,7 +748,6 @@ { "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}} - {{device}}", "legendLink": null } @@ -840,7 +823,6 @@ { "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"ingester.*.*\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{persistentvolumeclaim}}", "legendLink": null } @@ -904,7 +886,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-writes.json b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-writes.json index 7226214e..c36d91b6 100644 --- a/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-writes.json +++ b/monitoring-mixins/loki-mixin/deploy/dashboards_out/loki-writes.json @@ -33,6 +33,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -67,9 +68,8 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -144,7 +144,6 @@ { "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th percentile", "refId": "A", "step": 10 @@ -152,7 +151,6 @@ { "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th percentile", "refId": "B", "step": 10 @@ -160,7 +158,6 @@ { "expr": "1e3 * sum(cluster_job:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}) / sum(cluster_job:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C", "step": 10 @@ -210,6 +207,166 @@ "title": "Distributor", "titleSize": "h6" }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/distributor\",}[$__rate_interval])) / sum(rate(loki_distributor_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/distributor\",}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "bytes", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per Total Received Bytes", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (tenant) (rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/distributor\",}[$__rate_interval])) / ignoring(tenant) group_left sum(rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/distributor\",}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{tenant}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per Tenant", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": 1, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor - Structured Metadata", + "titleSize": "h6" + }, { "collapse": false, "height": "250px", @@ -221,6 +378,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -230,7 +388,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 3, + "id": 5, "legend": { "avg": false, "current": false, @@ -255,9 +413,8 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -305,7 +462,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 4, + "id": 6, "legend": { "avg": false, "current": false, @@ -332,7 +489,6 @@ { "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th percentile", "refId": "A", "step": 10 @@ -340,7 +496,6 @@ { "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th percentile", "refId": "B", "step": 10 @@ -348,7 +503,6 @@ { "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C", "step": 10 @@ -409,6 +563,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -418,7 +573,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 5, + "id": 7, "legend": { "avg": false, "current": false, @@ -443,9 +598,8 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -493,7 +647,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 6, + "id": 8, "legend": { "avg": false, "current": false, @@ -520,7 +674,6 @@ { "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th percentile", "refId": "A", "step": 10 @@ -528,7 +681,6 @@ { "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th percentile", "refId": "B", "step": 10 @@ -536,7 +688,6 @@ { "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C", "step": 10 @@ -597,6 +748,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -606,7 +758,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 7, + "id": 9, "legend": { "avg": false, "current": false, @@ -631,9 +783,8 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -681,7 +832,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 8, + "id": 10, "legend": { "avg": false, "current": false, @@ -708,21 +859,18 @@ { "expr": "histogram_quantile(0.99, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th Percentile", "refId": "A" }, { "expr": "histogram_quantile(0.50, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th Percentile", "refId": "B" }, { "expr": "sum(rate(loki_index_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval])) * 1e3 / sum(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C" } @@ -782,6 +930,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -791,7 +940,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 9, + "id": 11, "legend": { "avg": false, "current": false, @@ -816,9 +965,8 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -866,7 +1014,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 10, + "id": 12, "legend": { "avg": false, "current": false, @@ -893,21 +1041,18 @@ { "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th Percentile", "refId": "A" }, { "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th Percentile", "refId": "B" }, { "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C" } @@ -970,7 +1115,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/loki-mixin/deploy/manifests/k8s-all-in-one.yaml b/monitoring-mixins/loki-mixin/deploy/manifests/k8s-all-in-one.yaml index e04f2477..1d53858c 100644 --- a/monitoring-mixins/loki-mixin/deploy/manifests/k8s-all-in-one.yaml +++ b/monitoring-mixins/loki-mixin/deploy/manifests/k8s-all-in-one.yaml @@ -63,7 +63,6 @@ data: { "expr": "sum(loki_ingester_memory_chunks{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "series", "legendLink": null } @@ -138,7 +137,6 @@ data: { "expr": "sum(loki_ingester_memory_chunks{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}) / sum(loki_ingester_memory_streams{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "chunks", "legendLink": null } @@ -225,21 +223,18 @@ data: { "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th Percentile", "refId": "A" }, { "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th Percentile", "refId": "B" }, { "expr": "sum(rate(loki_ingester_chunk_utilization_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_utilization_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C" } @@ -314,21 +309,18 @@ data: { "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th Percentile", "refId": "A" }, { "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th Percentile", "refId": "B" }, { "expr": "sum(rate(loki_ingester_chunk_age_seconds_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) * 1e3 / sum(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C" } @@ -415,21 +407,18 @@ data: { "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_entries_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th Percentile", "refId": "A" }, { "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_entries_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) by (le)) * 1", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th Percentile", "refId": "B" }, { "expr": "sum(rate(loki_ingester_chunk_entries_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_entries_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C" } @@ -504,7 +493,6 @@ data: { "expr": "sum(rate(loki_chunk_store_index_entries_per_chunk_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) / sum(rate(loki_chunk_store_index_entries_per_chunk_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Index Entries", "legendLink": null } @@ -589,9 +577,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "cortex_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}", + "expr": "loki_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"} or cortex_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -639,6 +626,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -673,9 +661,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_ingester_chunk_age_seconds_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -762,7 +749,6 @@ data: { "expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -837,7 +823,6 @@ data: { "expr": "sum by (reason) (rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{reason}}", "legendLink": null } @@ -1048,21 +1033,18 @@ data: { "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[1m])) by (le))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "p99", "legendLink": null }, { "expr": "histogram_quantile(0.90, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[1m])) by (le))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "p90", "legendLink": null }, { "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[1m])) by (le))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "p50", "legendLink": null } @@ -1149,21 +1131,18 @@ data: { "expr": "histogram_quantile(0.5, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) by (le))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "p50", "legendLink": null }, { "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) by (le))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "p99", "legendLink": null }, { "expr": "sum(rate(loki_ingester_chunk_bounds_hours_sum{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m])) / sum(rate(loki_ingester_chunk_bounds_hours_count{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}[5m]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "avg", "legendLink": null } @@ -1226,7 +1205,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -1392,7 +1371,6 @@ data: "expr": "sum(loki_compactor_pending_delete_requests_count{cluster=~\"$cluster\", namespace=~\"$namespace\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -1468,7 +1446,6 @@ data: "expr": "max(loki_compactor_oldest_pending_delete_request_age_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"})", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -1554,7 +1531,6 @@ data: { "expr": "(loki_compactor_delete_requests_received_total{cluster=~\"$cluster\", namespace=~\"$namespace\"} or on() vector(0)) - on () (loki_compactor_delete_requests_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"} or on () vector(0))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "in progress", "legendLink": null } @@ -1629,7 +1605,6 @@ data: { "expr": "sum(increase(loki_compactor_delete_requests_received_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "received", "legendLink": null } @@ -1704,7 +1679,6 @@ data: { "expr": "sum(increase(loki_compactor_delete_requests_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "processed", "legendLink": null } @@ -1791,7 +1765,6 @@ data: { "expr": "node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"}", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -1866,7 +1839,6 @@ data: { "expr": "go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} / 1024 / 1024 ", "format": "time_series", - "intervalFactor": 2, "legendFormat": " {{pod}} ", "legendLink": null } @@ -1941,7 +1913,6 @@ data: { "expr": "loki_boltdb_shipper_compact_tables_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -2028,7 +1999,6 @@ data: { "expr": "sum(increase(loki_compactor_load_pending_requests_attempts_total{status=\"fail\", cluster=~\"$cluster\", namespace=~\"$namespace\"}[1h]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "failures", "legendLink": null } @@ -2103,7 +2073,6 @@ data: { "expr": "sum(rate(loki_compactor_deleted_lines{cluster=~\"$cluster\",job=~\"$namespace/compactor\"}[$__rate_interval])) by (user)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{user}}", "legendLink": null } @@ -2204,7 +2173,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -2385,7 +2354,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2472,7 +2441,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2558,7 +2527,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2644,7 +2613,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2730,7 +2699,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2816,7 +2785,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2903,7 +2872,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2990,7 +2959,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -3095,7 +3064,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -3169,7 +3138,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -3992,7 +3961,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -4229,7 +4198,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4325,7 +4294,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4356,6 +4325,102 @@ data: "alignLevel": null } }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { } + }, + "overrides": [ ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 1 + }, + "hiddenSeries": false, + "id": 11, + "legend": { + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(5, sum by (name,level) (rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\"}[$__interval])) - \nsum by (name,level) (rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\"}[$__interval] offset 1h)))", + "legendFormat": "{{name}}-{{level}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Bad Words", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { "aliasColors": { }, "bars": false, @@ -4420,7 +4485,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4516,7 +4581,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4612,7 +4677,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4718,7 +4783,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4824,7 +4889,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4921,7 +4986,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5030,7 +5095,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5127,7 +5192,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5236,7 +5301,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5342,7 +5407,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5439,7 +5504,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5547,7 +5612,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5644,7 +5709,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5745,7 +5810,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5942,7 +6007,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6034,7 +6099,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6126,7 +6191,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6243,7 +6308,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6332,7 +6397,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6421,7 +6486,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6510,7 +6575,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6615,7 +6680,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6707,7 +6772,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6799,7 +6864,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6916,7 +6981,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -7023,7 +7088,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -7114,7 +7179,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -7230,7 +7295,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -7383,7 +7448,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -7555,7 +7620,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -7647,7 +7712,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -7739,7 +7804,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -7856,7 +7921,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -7976,7 +8041,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -8068,7 +8133,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -8188,7 +8253,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -8280,7 +8345,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -8394,7 +8459,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -8497,7 +8562,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -8600,7 +8665,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -8698,7 +8763,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -8788,7 +8853,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -8878,7 +8943,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -8968,7 +9033,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -9058,7 +9123,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -9178,7 +9243,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -9270,7 +9335,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -9358,7 +9423,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -9372,7 +9437,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -9444,7 +9509,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -9458,7 +9523,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -9530,7 +9595,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -9544,7 +9609,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -9616,7 +9681,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -9630,7 +9695,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -9702,17 +9767,17 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", + "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", "legendFormat": ".99", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", + "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", "legendFormat": ".9", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", + "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", "legendFormat": ".5", "refId": "C" } @@ -9727,7 +9792,7 @@ data: "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -9802,19 +9867,19 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "intervalFactor": 1, "legendFormat": ".99-{{operation}}", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".9-{{operation}}", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".5-{{operation}}", "refId": "C" @@ -9830,7 +9895,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -9905,7 +9970,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", + "expr": "sum(rate(loki_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", "intervalFactor": 1, "legendFormat": "{{status_code}}-{{operation}}", "refId": "A" @@ -9921,7 +9986,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -10040,7 +10105,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -10131,7 +10196,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -10250,7 +10315,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -10341,7 +10406,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -10460,7 +10525,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -10551,7 +10616,7 @@ data: "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -10603,7 +10668,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -10788,21 +10853,18 @@ data: { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -10886,21 +10948,18 @@ data: { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-frontend\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -10973,7 +11032,6 @@ data: { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -11069,21 +11127,18 @@ data: { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -11167,21 +11222,18 @@ data: { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"query-scheduler\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -11254,7 +11306,6 @@ data: { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/query-scheduler\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -11351,21 +11402,18 @@ data: { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -11450,21 +11498,18 @@ data: { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -11538,7 +11583,6 @@ data: { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/querier\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -11612,7 +11656,6 @@ data: { "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"querier\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}} - {{device}}", "legendLink": null } @@ -11688,7 +11731,6 @@ data: { "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"querier\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}} - {{device}}", "legendLink": null } @@ -11764,7 +11806,6 @@ data: { "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"querier.*\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{persistentvolumeclaim}}", "legendLink": null } @@ -11864,21 +11905,18 @@ data: { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -11963,21 +12001,18 @@ data: { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -12051,7 +12086,6 @@ data: { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/index-gateway\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -12125,7 +12159,6 @@ data: { "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}} - {{device}}", "legendLink": null } @@ -12201,7 +12234,6 @@ data: { "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}} - {{device}}", "legendLink": null } @@ -12277,7 +12309,6 @@ data: { "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"index-gateway.*\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{persistentvolumeclaim}}", "legendLink": null } @@ -12376,21 +12407,18 @@ data: { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -12474,21 +12502,18 @@ data: { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -12561,7 +12586,6 @@ data: { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ingester.+\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -12645,9 +12669,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})", + "expr": "sum by(pod) (loki_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"}) or sum by(pod) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -12734,21 +12757,18 @@ data: { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -12833,21 +12853,18 @@ data: { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -12921,7 +12938,6 @@ data: { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -12983,7 +12999,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -13120,6 +13136,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -13154,9 +13171,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -13231,7 +13247,6 @@ data: { "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} 99th percentile", "refId": "A", "step": 10 @@ -13239,7 +13254,6 @@ data: { "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} 50th percentile", "refId": "B", "step": 10 @@ -13247,7 +13261,6 @@ data: { "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) ", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} Average", "refId": "C", "step": 10 @@ -13296,13 +13309,16 @@ data: "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" - } + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "unit": "s" } }, "fill": 1, @@ -13393,6 +13409,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -13427,9 +13444,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -13504,7 +13520,6 @@ data: { "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} 99th percentile", "refId": "A", "step": 10 @@ -13512,7 +13527,6 @@ data: { "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} 50th percentile", "refId": "B", "step": 10 @@ -13520,7 +13534,6 @@ data: { "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) ", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} Average", "refId": "C", "step": 10 @@ -13569,13 +13582,16 @@ data: "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" - } + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "unit": "s" } }, "fill": 1, @@ -13666,6 +13682,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -13700,9 +13717,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -13777,7 +13793,6 @@ data: { "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} 99th percentile", "refId": "A", "step": 10 @@ -13785,7 +13800,6 @@ data: { "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} 50th percentile", "refId": "B", "step": 10 @@ -13793,7 +13807,6 @@ data: { "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) ", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} Average", "refId": "C", "step": 10 @@ -13842,13 +13855,16 @@ data: "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" - } + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "unit": "s" } }, "fill": 1, @@ -13939,6 +13955,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -13973,9 +13990,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -14050,7 +14066,6 @@ data: { "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} 99th percentile", "refId": "A", "step": 10 @@ -14058,7 +14073,6 @@ data: { "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} 50th percentile", "refId": "B", "step": 10 @@ -14066,7 +14080,6 @@ data: { "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}) by (route) ", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{ route }} Average", "refId": "C", "step": 10 @@ -14115,13 +14128,16 @@ data: "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" - } + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "unit": "s" } }, "fill": 1, @@ -14212,6 +14228,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -14246,9 +14263,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -14323,21 +14339,18 @@ data: { "expr": "histogram_quantile(0.99, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th Percentile", "refId": "A" }, { "expr": "histogram_quantile(0.50, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th Percentile", "refId": "B" }, { "expr": "sum(rate(loki_index_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval])) * 1e3 / sum(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/querier\", operation!=\"index_chunk\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C" } @@ -14385,13 +14398,16 @@ data: "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" - } + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "unit": "s" } }, "fill": 1, @@ -14482,6 +14498,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -14516,9 +14533,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -14593,21 +14609,18 @@ data: { "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th Percentile", "refId": "A" }, { "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th Percentile", "refId": "B" }, { "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/(querier|index-gateway)\", operation=\"Shipper.Query\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C" } @@ -14655,13 +14668,16 @@ data: "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" - } + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "unit": "s" } }, "fill": 1, @@ -14755,7 +14771,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -14930,21 +14946,18 @@ data: { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -15028,21 +15041,18 @@ data: { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -15115,7 +15125,6 @@ data: { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/compactor\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -15159,7 +15168,361 @@ data: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Resource Usage", + "title": "Resource Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "blue", + "mode": "fixed" + }, + "custom": { }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "dateTimeFromNow" + } + }, + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": { }, + "textMode": "auto" + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"} * 1e3", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Last Compact Tables Operation Success", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "loki_boltdb_shipper_compact_tables_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}", + "format": "time_series", + "legendFormat": "duration", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Compact Tables Operations Duration", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Compaction", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(loki_compactor_skipped_compacting_locked_table_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__range]))", + "format": "time_series", + "legendFormat": "{{table_name}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Number of times Tables were skipped during Compaction", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status)(rate(loki_boltdb_shipper_compact_tables_operation_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{success}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Compact Tables Operations Per Status", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", "titleSize": "h6" }, { @@ -15192,7 +15555,7 @@ data: } }, "fill": 1, - "id": 4, + "id": 8, "legend": { "avg": false, "current": false, @@ -15232,7 +15595,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"} * 1e3", + "expr": "loki_compactor_apply_retention_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"} * 1e3", "format": "time_series", "instant": true, "refId": "A" @@ -15241,7 +15604,7 @@ data: "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Last Compact and Mark Operation Success", + "title": "Last Mark Operation Success", "tooltip": { "shared": true, "sort": 2, @@ -15281,7 +15644,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 5, + "id": 9, "legend": { "avg": false, "current": false, @@ -15306,9 +15669,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "loki_boltdb_shipper_compact_tables_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}", + "expr": "loki_compactor_apply_retention_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}", "format": "time_series", - "intervalFactor": 2, "legendFormat": "duration", "legendLink": null } @@ -15316,7 +15678,7 @@ data: "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Compact and Mark Operations Duration", + "title": "Mark Operations Duration", "tooltip": { "shared": true, "sort": 2, @@ -15356,7 +15718,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 6, + "id": 10, "legend": { "avg": false, "current": false, @@ -15381,9 +15743,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status)(rate(loki_boltdb_shipper_compact_tables_operation_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "expr": "sum by (status)(rate(loki_compactor_apply_retention_operation_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{success}}", "legendLink": null } @@ -15391,7 +15752,7 @@ data: "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Compact and Mark Operations Per Status", + "title": "Mark Operations Per Status", "tooltip": { "shared": true, "sort": 2, @@ -15429,7 +15790,7 @@ data: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Compact and Mark", + "title": "Retention", "titleSize": "h6" }, { @@ -15443,7 +15804,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 7, + "id": 11, "legend": { "avg": false, "current": false, @@ -15470,7 +15831,6 @@ data: { "expr": "count by(action)(loki_boltdb_shipper_retention_marker_table_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{action}}", "legendLink": null } @@ -15518,7 +15878,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 8, + "id": 12, "legend": { "avg": false, "current": false, @@ -15545,7 +15905,6 @@ data: { "expr": "count by(table,action)(loki_boltdb_shipper_retention_marker_table_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\" , action=~\"modified|deleted\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{table}}-{{action}}", "legendLink": null } @@ -15593,7 +15952,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 9, + "id": 13, "legend": { "avg": false, "current": false, @@ -15620,7 +15979,6 @@ data: { "expr": "sum by (table)(rate(loki_boltdb_shipper_retention_marker_count_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >0", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{table}}", "legendLink": null } @@ -15681,7 +16039,7 @@ data: "datasource": "$datasource", "fill": 1, "format": "short", - "id": 10, + "id": 14, "legend": { "avg": false, "current": false, @@ -15709,7 +16067,6 @@ data: "expr": "sum (increase(loki_boltdb_shipper_retention_marker_count_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[24h]))", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -15756,7 +16113,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 11, + "id": 15, "legend": { "avg": false, "current": false, @@ -15783,21 +16140,18 @@ data: { "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th Percentile", "refId": "A" }, { "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th Percentile", "refId": "B" }, { "expr": "sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C" } @@ -15858,7 +16212,7 @@ data: "datasource": "$datasource", "fill": 1, "format": "short", - "id": 12, + "id": 16, "legend": { "avg": false, "current": false, @@ -15886,7 +16240,6 @@ data: "expr": "sum (increase(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[24h]))", "format": "time_series", "instant": true, - "intervalFactor": 2, "refId": "A" } ], @@ -15933,7 +16286,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 13, + "id": 17, "legend": { "avg": false, "current": false, @@ -15960,21 +16313,18 @@ data: { "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th Percentile", "refId": "A" }, { "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th Percentile", "refId": "B" }, { "expr": "sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C" } @@ -16034,7 +16384,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 14, + "id": 18, "legend": { "avg": false, "current": false, @@ -16061,7 +16411,6 @@ data: { "expr": "time() - (loki_boltdb_shipper_retention_sweeper_marker_file_processing_current_time{cluster=~\"$cluster\", namespace=~\"$namespace\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "lag", "legendLink": null } @@ -16109,7 +16458,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 15, + "id": 19, "legend": { "avg": false, "current": false, @@ -16136,7 +16485,6 @@ data: { "expr": "sum(loki_boltdb_shipper_retention_sweeper_marker_files_current{cluster=~\"$cluster\", namespace=~\"$namespace\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "count", "legendLink": null } @@ -16184,7 +16532,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 16, + "id": 20, "legend": { "avg": false, "current": false, @@ -16211,7 +16559,6 @@ data: { "expr": "sum by (status)(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "legendLink": null } @@ -16266,7 +16613,7 @@ data: "panels": [ { "datasource": "$loki_datasource", - "id": 17, + "id": 21, "span": 12, "targets": [ { @@ -16299,7 +16646,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -16484,21 +16831,18 @@ data: { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -16582,21 +16926,18 @@ data: { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"distributor\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -16669,7 +17010,6 @@ data: { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -16755,7 +17095,6 @@ data: { "expr": "sum by(pod) (loki_ingester_memory_streams{cluster=~\"$cluster\", job=~\"($namespace)/ingester.*\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -16840,21 +17179,18 @@ data: { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"cpu\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -16939,21 +17275,18 @@ data: { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null }, { "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", resource=\"memory\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "request", "legendLink": null }, { "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"} > 0)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "limit", "legendLink": null } @@ -17027,7 +17360,6 @@ data: { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ingester.*\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}}", "legendLink": null } @@ -17101,7 +17433,6 @@ data: { "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}} - {{device}}", "legendLink": null } @@ -17177,7 +17508,6 @@ data: { "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{pod}} - {{device}}", "legendLink": null } @@ -17253,7 +17583,6 @@ data: { "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"ingester.*.*\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{persistentvolumeclaim}}", "legendLink": null } @@ -17317,7 +17646,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -17454,6 +17783,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -17488,9 +17818,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/distributor\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -17565,7 +17894,6 @@ data: { "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th percentile", "refId": "A", "step": 10 @@ -17573,7 +17901,6 @@ data: { "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th percentile", "refId": "B", "step": 10 @@ -17581,7 +17908,6 @@ data: { "expr": "1e3 * sum(cluster_job:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}) / sum(cluster_job:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C", "step": 10 @@ -17631,6 +17957,166 @@ data: "title": "Distributor", "titleSize": "h6" }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/distributor\",}[$__rate_interval])) / sum(rate(loki_distributor_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/distributor\",}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "bytes", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per Total Received Bytes", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (tenant) (rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/distributor\",}[$__rate_interval])) / ignoring(tenant) group_left sum(rate(loki_distributor_structured_metadata_bytes_received_total{cluster=~\"$cluster\",job=~\"($namespace)/distributor\",}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{tenant}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Per Tenant", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": 1, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor - Structured Metadata", + "titleSize": "h6" + }, { "collapse": false, "height": "250px", @@ -17642,6 +18128,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -17651,7 +18138,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 3, + "id": 5, "legend": { "avg": false, "current": false, @@ -17676,9 +18163,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -17726,7 +18212,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 4, + "id": 6, "legend": { "avg": false, "current": false, @@ -17753,7 +18239,6 @@ data: { "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th percentile", "refId": "A", "step": 10 @@ -17761,7 +18246,6 @@ data: { "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th percentile", "refId": "B", "step": 10 @@ -17769,7 +18253,6 @@ data: { "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester-zone.*\", route=\"/logproto.Pusher/Push\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C", "step": 10 @@ -17830,6 +18313,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -17839,7 +18323,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 5, + "id": 7, "legend": { "avg": false, "current": false, @@ -17864,9 +18348,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -17914,7 +18397,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 6, + "id": 8, "legend": { "avg": false, "current": false, @@ -17941,7 +18424,6 @@ data: { "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th percentile", "refId": "A", "step": 10 @@ -17949,7 +18431,6 @@ data: { "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"})) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th percentile", "refId": "B", "step": 10 @@ -17957,7 +18438,6 @@ data: { "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"}) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\"})", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C", "step": 10 @@ -18018,6 +18498,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -18027,7 +18508,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 7, + "id": 9, "legend": { "avg": false, "current": false, @@ -18052,9 +18533,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -18102,7 +18582,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 8, + "id": 10, "legend": { "avg": false, "current": false, @@ -18129,21 +18609,18 @@ data: { "expr": "histogram_quantile(0.99, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th Percentile", "refId": "A" }, { "expr": "histogram_quantile(0.50, sum(rate(loki_index_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th Percentile", "refId": "B" }, { "expr": "sum(rate(loki_index_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval])) * 1e3 / sum(rate(loki_index_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester.*\", operation=\"index_chunk\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C" } @@ -18203,6 +18680,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -18212,7 +18690,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 9, + "id": 11, "legend": { "avg": false, "current": false, @@ -18237,9 +18715,8 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{status}}", "refId": "A" } @@ -18287,7 +18764,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 10, + "id": 12, "legend": { "avg": false, "current": false, @@ -18314,21 +18791,18 @@ data: { "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "99th Percentile", "refId": "A" }, { "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", - "intervalFactor": 2, "legendFormat": "50th Percentile", "refId": "B" }, { "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", operation=\"WRITE\"}[$__rate_interval]))", "format": "time_series", - "intervalFactor": 2, "legendFormat": "Average", "refId": "C" } @@ -18391,7 +18865,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/loki-mixin/jsonnetfile.lock.json b/monitoring-mixins/loki-mixin/jsonnetfile.lock.json index d4545b3a..0f43adff 100644 --- a/monitoring-mixins/loki-mixin/jsonnetfile.lock.json +++ b/monitoring-mixins/loki-mixin/jsonnetfile.lock.json @@ -18,8 +18,8 @@ "subdir": "grafana-builder" } }, - "version": "197b22d7a053fe6d03a40f26abda064d4b717620", - "sum": "xEFMv4+ObwP5L1Wu0XK5agWci4AJzNApys6iKAQxLlQ=" + "version": "02db06f540086fa3f67d487bd01e1b314853fb8f", + "sum": "B49EzIY2WZsFxNMJcgRxE/gcZ9ltnS8pkOOV6Q5qioc=" }, { "source": { @@ -28,8 +28,8 @@ "subdir": "mixin-utils" } }, - "version": "197b22d7a053fe6d03a40f26abda064d4b717620", - "sum": "p5hRaq4GhUbgKZHtKqYmC8fgg8FoWJDkCN6PbonEXlk=" + "version": "02db06f540086fa3f67d487bd01e1b314853fb8f", + "sum": "PGf+vyCHqGxxS6SKNZiN3vR1xPnw6VOESXbeJrA5FaA=" }, { "source": { @@ -38,8 +38,8 @@ "subdir": "production/loki-mixin" } }, - "version": "7fa2f6ea9ef451a4f78a3db0612d6093266f620f", - "sum": "oVfDcrQXxmVMTsiHoZOjR904ahztzW06BdzQ8OI6tEU=" + "version": "afe146c88445b046d7fd2ac7e98ad490dda51018", + "sum": "IohgLDbRoSrvwmtvd5WkZjf06TxJDvEBSjlp+1s4VDg=" }, { "source": { @@ -48,8 +48,8 @@ "subdir": "operations/mimir-mixin" } }, - "version": "9bea6d63b965bd3eeda19e27898c160eca5618b5", - "sum": "U/xXSZ5ckWThjzI1KZSm0qWUpKnZ+55lYn3i14C8B9k=" + "version": "297e905ce8bb86382b5c50f9abbcf335e2b22244", + "sum": "TBKoXovJMzJ5M6vCrj1zser5ljWYuYXNy/IoUOZ3S/c=" }, { "source": { @@ -58,7 +58,7 @@ "subdir": "jsonnet/kube-prometheus/lib" } }, - "version": "b9d1ff5a8848bcedb465d60dd61775debb881534", + "version": "0cf56a9fb60a30102fb549add8afaa8d4e2e89ec", "sum": "QKRgrgEZ3k9nLmLCrDBaeIGVqQZf+AvZTcnhdLk3TrA=" } ], diff --git a/monitoring-mixins/loki-mixin/mixin.libsonnet b/monitoring-mixins/loki-mixin/mixin.libsonnet index c8753fc6..e4456e07 100644 --- a/monitoring-mixins/loki-mixin/mixin.libsonnet +++ b/monitoring-mixins/loki-mixin/mixin.libsonnet @@ -1,34 +1 @@ -local mixin = import 'loki-mixin/mixin.libsonnet'; - -mixin { - _config+:: { - // Tags for dashboards. - tags: ['loki'], - - // The label used to differentiate between different application instances (i.e. 'pod' in a kubernetes install). - per_instance_label: 'pod', - - // The label used to differentiate between different nodes (i.e. servers). - per_node_label: 'instance', - - // The label used to differentiate between different clusters. - per_cluster_label: 'cluster', - - // Enable dashboard and panels for Grafana Labs internal components. - internal_components: false, - - promtail: { - // Whether or not to include promtail specific dashboards - enabled: false, - }, - - // SSD related configuration for dashboards. - ssd: { - // Support Loki SSD mode on dashboards. - enabled: false, - - // The prefix used to match the write and read pods on SSD mode. - pod_prefix_matcher: '(loki|enterprise-logs)', - }, - }, -} +import 'loki-mixin/mixin.libsonnet' \ No newline at end of file diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet index a3342878..0bd0b339 100644 --- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet +++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet @@ -109,7 +109,7 @@ value: datasource, }, hide: 0, - label: 'Data Source', + label: 'Data source', name: 'datasource', options: [], query: 'prometheus', @@ -289,7 +289,6 @@ legendLink: legendLink, expr: ql.q, format: 'time_series', - intervalFactor: 2, legendFormat: ql.l, } for ql in qsandls @@ -305,7 +304,6 @@ expr: query, format: 'time_series', instant: true, - intervalFactor: 2, refId: 'A', }, ], @@ -368,7 +366,6 @@ expr: qs[i], format: 'table', instant: true, - intervalFactor: 2, legendFormat: '', refId: std.char(65 + i), } @@ -430,6 +427,7 @@ '3xx': '#6ED0E0', '4xx': '#EF843C', '5xx': '#E24D42', + OK: '#7EB26D', success: '#7EB26D', 'error': '#E24D42', cancel: '#A9A9A9', @@ -441,10 +439,9 @@ sum by (status) ( label_replace(label_replace(rate(%s[$__rate_interval]), "status", "${1}xx", "%s", "([0-9]).."), - "status", "${1}", "%s", "([a-z]+)")) + "status", "${1}", "%s", "([a-zA-Z]+)")) ||| % [selector, statusLabelName, statusLabelName], format: 'time_series', - intervalFactor: 2, legendFormat: '{{status}}', refId: 'A', }, @@ -457,21 +454,18 @@ { expr: 'histogram_quantile(0.99, sum(rate(%s_bucket%s[$__rate_interval])) by (le)) * %s' % [metricName, selector, multiplier], format: 'time_series', - intervalFactor: 2, legendFormat: '99th Percentile', refId: 'A', }, { expr: 'histogram_quantile(0.50, sum(rate(%s_bucket%s[$__rate_interval])) by (le)) * %s' % [metricName, selector, multiplier], format: 'time_series', - intervalFactor: 2, legendFormat: '50th Percentile', refId: 'B', }, { expr: 'sum(rate(%s_sum%s[$__rate_interval])) * %s / sum(rate(%s_count%s[$__rate_interval]))' % [metricName, selector, multiplier, metricName, selector], format: 'time_series', - intervalFactor: 2, legendFormat: 'Average', refId: 'C', }, diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/jsonnet-libs/mixin-utils/utils.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/jsonnet-libs/mixin-utils/utils.libsonnet index 2e94e3d2..8113a989 100644 --- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/jsonnet-libs/mixin-utils/utils.libsonnet +++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/jsonnet-libs/mixin-utils/utils.libsonnet @@ -66,7 +66,6 @@ local g = import 'grafana-builder/grafana.libsonnet'; sumBy: sumByHisto, }, format: 'time_series', - intervalFactor: 2, legendFormat: '%(legend)s99th percentile' % legend, refId: 'A', step: 10, @@ -80,7 +79,6 @@ local g = import 'grafana-builder/grafana.libsonnet'; sumBy: sumByHisto, }, format: 'time_series', - intervalFactor: 2, legendFormat: '%(legend)s50th percentile' % legend, refId: 'B', step: 10, @@ -94,7 +92,6 @@ local g = import 'grafana-builder/grafana.libsonnet'; sumBy: sumBy, }, format: 'time_series', - intervalFactor: 2, legendFormat: '%(legend)sAverage' % legend, refId: 'C', step: 10, diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/config.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/config.libsonnet index e0b09677..1fa22f56 100644 --- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/config.libsonnet +++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/config.libsonnet @@ -20,6 +20,9 @@ enabled: true, }, + // Enable TSDB specific dashboards + tsdb: true, + // SSD related configuration for dashboards. ssd: { // Support Loki SSD mode on dashboards. diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/dashboard-loki-logs.json b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/dashboard-loki-logs.json index 916a4acc..bcb5737a 100644 --- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/dashboard-loki-logs.json +++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/dashboard-loki-logs.json @@ -79,7 +79,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -166,7 +166,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -252,7 +252,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -338,7 +338,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -424,7 +424,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -510,7 +510,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -597,7 +597,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -684,7 +684,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -789,7 +789,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/dashboard-loki-operational.json b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/dashboard-loki-operational.json index 1342dfc9..2dd944c2 100644 --- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/dashboard-loki-operational.json +++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/dashboard-loki-operational.json @@ -105,7 +105,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -200,7 +200,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -295,7 +295,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -389,7 +389,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -484,7 +484,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -579,7 +579,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -684,7 +684,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -789,7 +789,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -885,7 +885,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -993,7 +993,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -1089,7 +1089,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -1197,7 +1197,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -1302,7 +1302,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -1398,7 +1398,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -1505,7 +1505,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -1601,7 +1601,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -1701,7 +1701,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -1895,7 +1895,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -1986,7 +1986,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2077,7 +2077,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2192,7 +2192,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2280,7 +2280,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2368,7 +2368,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2456,7 +2456,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2559,7 +2559,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2650,7 +2650,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2741,7 +2741,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2856,7 +2856,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -2961,7 +2961,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -3051,7 +3051,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -3165,7 +3165,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -3316,7 +3316,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -3485,7 +3485,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -3576,7 +3576,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -3667,7 +3667,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -3782,7 +3782,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -3900,7 +3900,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -3991,7 +3991,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4109,7 +4109,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4200,7 +4200,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4312,7 +4312,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4414,7 +4414,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4516,7 +4516,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4613,7 +4613,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4702,7 +4702,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4791,7 +4791,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4880,7 +4880,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -4969,7 +4969,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5087,7 +5087,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5178,7 +5178,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5264,7 +5264,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -5278,7 +5278,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5349,7 +5349,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -5363,7 +5363,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5434,7 +5434,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -5448,7 +5448,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5519,7 +5519,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -5533,7 +5533,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5604,17 +5604,17 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", + "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", "legendFormat": ".99", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", + "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", "legendFormat": ".9", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", + "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", "legendFormat": ".5", "refId": "C" } @@ -5629,7 +5629,7 @@ "sort": 0, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5703,19 +5703,19 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "intervalFactor": 1, "legendFormat": ".99-{{operation}}", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".9-{{operation}}", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".5-{{operation}}", "refId": "C" @@ -5731,7 +5731,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5805,7 +5805,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", + "expr": "sum(rate(loki_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", "intervalFactor": 1, "legendFormat": "{{status_code}}-{{operation}}", "refId": "A" @@ -5821,7 +5821,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -5938,7 +5938,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6028,7 +6028,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6145,7 +6145,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6235,7 +6235,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6324,19 +6324,19 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(cortex_cassandra_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.99, sum(rate(loki_cassandra_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "intervalFactor": 1, "legendFormat": ".99-{{operation}}", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(cortex_cassandra_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.9, sum(rate(loki_cassandra_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".9-{{operation}}", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(cortex_cassandra_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.5, sum(rate(loki_cassandra_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".5-{{operation}}", "refId": "C" @@ -6352,7 +6352,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6426,7 +6426,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_cassandra_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", + "expr": "sum(rate(loki_cassandra_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", "intervalFactor": 1, "legendFormat": "{{status_code}}-{{operation}}", "refId": "A" @@ -6442,7 +6442,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6559,7 +6559,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", @@ -6649,7 +6649,7 @@ "sort": 2, "value_type": "individual" }, - "type": "graph", + "type": "timeseries", "xaxis": { "buckets": null, "mode": "time", diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-chunks.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-chunks.libsonnet index 3aa1e714..99a1fa06 100644 --- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-chunks.libsonnet +++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-chunks.libsonnet @@ -63,7 +63,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.row('Flush Stats') .addPanel( $.panel('Queue Length') + - $.queryPanel('cortex_ingester_flush_queue_length{%s}' % dashboards['loki-chunks.json'].labelsSelector, '{{pod}}'), + $.queryPanel('loki_ingester_flush_queue_length{%(label)s} or cortex_ingester_flush_queue_length{%(label)s}' % { label: dashboards['loki-chunks.json'].labelsSelector }, '{{pod}}'), ) .addPanel( $.panel('Flush Rate') + diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-reads-resources.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-reads-resources.libsonnet index 4e54760b..3d17903c 100644 --- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-reads-resources.libsonnet +++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-reads-resources.libsonnet @@ -139,7 +139,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addPanel( $.panel('Rules') + $.queryPanel( - 'sum by(%s) (cortex_prometheus_rule_group_rules{%s})' % [$._config.per_instance_label, $.jobMatcher('ruler')], + 'sum by(%(label)s) (loki_prometheus_rule_group_rules{%(matcher)s}) or sum by(%(label)s) (cortex_prometheus_rule_group_rules{%(matcher)s})' % { label: $._config.per_instance_label, matcher: $.jobMatcher('ruler') }, '{{%s}}' % $._config.per_instance_label ), ) diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-reads.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-reads.libsonnet index 136a041e..538cade4 100644 --- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-reads.libsonnet +++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-reads.libsonnet @@ -28,13 +28,16 @@ local utils = import 'mixin-utils/utils.libsonnet'; }, ], fieldConfig+: { - custom+: { - fillOpacity: 50, - showPoints: 'never', - stacking: { - group: 'A', - mode: 'normal', + defaults+: { + custom+: { + fillOpacity: 50, + showPoints: 'never', + stacking: { + group: 'A', + mode: 'normal', + }, }, + unit: 's', }, }, }, diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-retention.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-retention.libsonnet index 8e28ccdb..a5aa45a1 100644 --- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-retention.libsonnet +++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-retention.libsonnet @@ -25,20 +25,42 @@ local utils = import 'mixin-utils/utils.libsonnet'; ) .addRow( - $.row('Compact and Mark') + $.row('Compaction') .addPanel( - $.fromNowPanel('Last Compact and Mark Operation Success', 'loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds') + $.fromNowPanel('Last Compact Tables Operation Success', 'loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds') ) .addPanel( - $.panel('Compact and Mark Operations Duration') + + $.panel('Compact Tables Operations Duration') + $.queryPanel(['loki_boltdb_shipper_compact_tables_operation_duration_seconds{%s}' % $.namespaceMatcher()], ['duration']) + { yaxes: $.yaxes('s') }, ) + ) + .addRow( + $.row('') .addPanel( - $.panel('Compact and Mark Operations Per Status') + + $.panel('Number of times Tables were skipped during Compaction') + + $.queryPanel(['sum(increase(loki_compactor_skipped_compacting_locked_table_total{%s}[$__range]))' % $.namespaceMatcher()], ['{{table_name}}']), + ) + .addPanel( + $.panel('Compact Tables Operations Per Status') + $.queryPanel(['sum by (status)(rate(loki_boltdb_shipper_compact_tables_operation_total{%s}[$__rate_interval]))' % $.namespaceMatcher()], ['{{success}}']), ) ) + .addRow( + $.row('Retention') + .addPanel( + $.fromNowPanel('Last Mark Operation Success', 'loki_compactor_apply_retention_last_successful_run_timestamp_seconds') + ) + .addPanel( + $.panel('Mark Operations Duration') + + $.queryPanel(['loki_compactor_apply_retention_operation_duration_seconds{%s}' % $.namespaceMatcher()], ['duration']) + + { yaxes: $.yaxes('s') }, + ) + .addPanel( + $.panel('Mark Operations Per Status') + + $.queryPanel(['sum by (status)(rate(loki_compactor_apply_retention_operation_total{%s}[$__rate_interval]))' % $.namespaceMatcher()], ['{{success}}']), + ) + ) .addRow( $.row('Per Table Marker') .addPanel( diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-writes.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-writes.libsonnet index df710bb0..a12f4f7c 100644 --- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-writes.libsonnet +++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/loki/production/loki-mixin/dashboards/loki-writes.libsonnet @@ -69,6 +69,24 @@ local utils = import 'mixin-utils/utils.libsonnet'; ) ) ) + .addRowIf( + $._config.tsdb, + $.row(if $._config.ssd.enabled then 'Write Path' else 'Distributor - Structured Metadata') + .addPanel( + $.panel('Per Total Received Bytes') + + $.queryPanel('sum (rate(loki_distributor_structured_metadata_bytes_received_total{%s}[$__rate_interval])) / sum(rate(loki_distributor_bytes_received_total{%s}[$__rate_interval]))' % [dashboards['loki-writes.json'].distributorSelector, dashboards['loki-writes.json'].distributorSelector], 'bytes') + ) + .addPanel( + $.panel('Per Tenant') + + $.queryPanel('sum by (tenant) (rate(loki_distributor_structured_metadata_bytes_received_total{%s}[$__rate_interval])) / ignoring(tenant) group_left sum(rate(loki_distributor_structured_metadata_bytes_received_total{%s}[$__rate_interval]))' % [dashboards['loki-writes.json'].distributorSelector, dashboards['loki-writes.json'].distributorSelector], '{{tenant}}') + { + stack: true, + yaxes: [ + { format: 'short', label: null, logBase: 1, max: 1, min: 0, show: true }, + { format: 'short', label: null, logBase: 1, max: 1, min: null, show: false }, + ], + }, + ) + ) .addRowIf( !$._config.ssd.enabled, $.row('Ingester - Zone Aware') diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/alerts.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/alerts.libsonnet index 029c7ae7..f222d4d9 100644 --- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/alerts.libsonnet +++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/alerts.libsonnet @@ -312,6 +312,21 @@ local utils = import 'mixin-utils/utils.libsonnet'; message: '%(product)s ingester %(alert_instance_variable)s in %(alert_aggregation_variables)s has ingested samples with timestamps more than 1h in the future.' % $._config, }, }, + { + alert: $.alertName('StoreGatewayTooManyFailedOperations'), + 'for': '5m', + expr: ||| + sum by(%(alert_aggregation_labels)s, operation) (rate(thanos_objstore_bucket_operation_failures_total{component="store-gateway"}[1m])) > 0 + ||| % { + alert_aggregation_labels: $._config.alert_aggregation_labels, + }, + labels: { + severity: 'warning', + }, + annotations: { + message: '%(product)s store-gateway %(alert_instance_variable)s in %(alert_aggregation_variables)s is experiencing {{ $value | humanizePercentage }} errors while doing {{ $labels.operation }} on the object storage.' % $._config, + }, + }, ] + [ { alert: $.alertName('RingMembersMismatch'), @@ -698,17 +713,47 @@ local utils = import 'mixin-utils/utils.libsonnet'; name: 'gossip_alerts', rules: [ { - alert: $.alertName('GossipMembersMismatch'), + // What's the purpose of this alert? We want to know if two databases' Memberlist clusters have merged. + // We do this by comparing the reported number of cluster members with the expected number of members based on the number of running pods in that namespace. + // If two Memberlist clusters have merged, then the reported number of members will be higher than the expected number. + // However, during rollouts, the number of reported cluster members can be higher than the expected number because it takes some time for the removal of old + // pods to be propagated to all members of the cluster, so we add a fudge factor of 10 extra members. + // This value is designed to be low enough that the alert will trigger if another cluster merges with this one (assuming that most clusters have more than 10 + // members), but high enough to not result in false positives during rollouts. + // We don't use a percentage because this would not be reliable: in a large Mimir cluster of 1000+ instances, even a small percentage like 5% would be 50 + // instances - too high to catch a small cluster merging with a big one. + alert: $.alertName('GossipMembersTooHigh'), expr: ||| - avg by (%s) (memberlist_client_cluster_members_count) != sum by (%s) (up{%s=~".+/%s"}) + max by (%s) (memberlist_client_cluster_members_count) + > + (sum by (%s) (up{%s=~".+/%s"}) + 10) ||| % [$._config.alert_aggregation_labels, $._config.alert_aggregation_labels, $._config.per_job_label, simpleRegexpOpt($._config.job_names.ring_members)], - 'for': '15m', + 'for': '20m', + labels: { + severity: 'warning', + }, + annotations: { + message: 'One or more %(product)s instances in %(alert_aggregation_variables)s consistently sees a higher than expected number of gossip members.' % $._config, + }, + }, + { + // What's the purpose of this alert? We want to know if a cell has reached a split brain scenario. + // We do this by comparing the reported number of cluster members with the expected number of members based on the number of running pods in that namespace. + // If a split has occurred, then the reported number of members will be lower than the expected number. + alert: $.alertName('GossipMembersTooLow'), + expr: + ||| + min by (%s) (memberlist_client_cluster_members_count) + < + (sum by (%s) (up{%s=~".+/%s"}) * 0.5) + ||| % [$._config.alert_aggregation_labels, $._config.alert_aggregation_labels, $._config.per_job_label, simpleRegexpOpt($._config.job_names.ring_members)], + 'for': '20m', labels: { severity: 'warning', }, annotations: { - message: 'One or more %(product)s instances in %(alert_aggregation_variables)s see incorrect number of gossip members.' % $._config, + message: 'One or more %(product)s instances in %(alert_aggregation_variables)s consistently sees a lower than expected number of gossip members.' % $._config, }, }, ], diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/autoscaling.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/autoscaling.libsonnet index ef9a7de1..73d15b8a 100644 --- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/autoscaling.libsonnet +++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/autoscaling.libsonnet @@ -8,19 +8,23 @@ 'for': '1h', expr: ||| ( - kube_horizontalpodautoscaler_status_condition{condition="ScalingActive",status="false"} - # Match only Mimir namespaces. - * on(%(aggregation_labels)s) group_left max by(%(aggregation_labels)s) (cortex_build_info) - # Add "metric" label. - + on(%(aggregation_labels)s, horizontalpodautoscaler) group_right label_replace(kube_horizontalpodautoscaler_spec_target_metric*0, "metric", "$1", "metric_name", "(.+)") - > 0 + label_replace(( + kube_horizontalpodautoscaler_status_condition{condition="ScalingActive",status="false"} + # Match only Mimir namespaces. + * on(%(aggregation_labels)s) group_left max by(%(aggregation_labels)s) (cortex_build_info) + # Add "metric" label. + + on(%(aggregation_labels)s, horizontalpodautoscaler) group_right label_replace(kube_horizontalpodautoscaler_spec_target_metric*0, "metric", "$1", "metric_name", "(.+)") + > 0), + "scaledObject", "$1", "horizontalpodautoscaler", "%(hpa_prefix)s(.*)" + ) ) # Alert only if the scaling metric exists and is > 0. If the KEDA ScaledObject is configured to scale down 0, # then HPA ScalingActive may be false when expected to run 0 replicas. In this case, the scaling metric exported # by KEDA could not exist at all or being exposed with a value of 0. - and on (%(aggregation_labels)s, metric) - (label_replace(keda_metrics_adapter_scaler_metrics_value, "namespace", "$0", "exported_namespace", ".+") > 0) + and on (%(aggregation_labels)s, metric, scaledObject) + (label_replace(keda_scaler_metrics_value, "namespace", "$0", "exported_namespace", ".+") > 0) ||| % { + hpa_prefix: $._config.autoscaling_hpa_prefix, aggregation_labels: $._config.alert_aggregation_labels, }, labels: { @@ -36,7 +40,7 @@ expr: ||| ( # Find KEDA scalers reporting errors. - label_replace(rate(keda_metrics_adapter_scaler_errors[5m]), "namespace", "$1", "exported_namespace", "(.*)") + label_replace(rate(keda_scaler_errors[5m]), "namespace", "$1", "exported_namespace", "(.*)") # Match only Mimir namespaces. * on(%(aggregation_labels)s) group_left max by(%(aggregation_labels)s) (cortex_build_info) ) diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/blocks.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/blocks.libsonnet index d2404f63..2c969bf2 100644 --- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/blocks.libsonnet +++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/blocks.libsonnet @@ -183,22 +183,6 @@ message: '%(product)s Ingester %(alert_instance_variable)s in %(alert_aggregation_variables)s is failing to write to TSDB WAL.' % $._config, }, }, - { - // Alert if the querier is not successfully scanning the bucket. - alert: $.alertName('QuerierHasNotScanTheBucket'), - 'for': '5m', - expr: ||| - (time() - cortex_querier_blocks_last_successful_scan_timestamp_seconds > 60 * 30) - and - cortex_querier_blocks_last_successful_scan_timestamp_seconds > 0 - |||, - labels: { - severity: 'critical', - }, - annotations: { - message: '%(product)s Querier %(alert_instance_variable)s in %(alert_aggregation_variables)s has not successfully scanned the bucket since {{ $value | humanizeDuration }}.' % $._config, - }, - }, { // Alert if the store-gateway is not successfully synching the bucket. alert: $.alertName('StoreGatewayHasNotSyncTheBucket'), diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/compactor.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/compactor.libsonnet index d29d09a9..79e5a5f5 100644 --- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/compactor.libsonnet +++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/compactor.libsonnet @@ -110,32 +110,32 @@ }, }, { - // Alert if compactor has tried to compact blocks with out-of-order chunks. - alert: $.alertName('CompactorSkippedBlocksWithOutOfOrderChunks'), + // Alert if compactor has tried to compact unhealthy blocks. + alert: $.alertName('CompactorSkippedUnhealthyBlocks'), 'for': '1m', expr: ||| - increase(cortex_compactor_blocks_marked_for_no_compaction_total{reason="block-index-out-of-order-chunk"}[5m]) > 0 + increase(cortex_compactor_blocks_marked_for_no_compaction_total[5m]) > 0 |||, labels: { severity: 'warning', }, annotations: { - message: '%(product)s Compactor %(alert_instance_variable)s in %(alert_aggregation_variables)s has found and ignored blocks with out of order chunks.' % $._config, + message: '%(product)s Compactor %(alert_instance_variable)s in %(alert_aggregation_variables)s has found and ignored unhealthy blocks.' % $._config, }, }, { - // Alert if compactor has tried to compact blocks with out-of-order chunks. + // Alert if compactor has tried to compact unhealthy blocks. // Any number greater than 1 over the last 30 minutes should be investigated quickly as it could start to impact the read path. - alert: $.alertName('CompactorSkippedBlocksWithOutOfOrderChunks'), + alert: $.alertName('CompactorSkippedUnhealthyBlocks'), 'for': '30m', expr: ||| - increase(cortex_compactor_blocks_marked_for_no_compaction_total{reason="block-index-out-of-order-chunk"}[5m]) > 1 + increase(cortex_compactor_blocks_marked_for_no_compaction_total[5m]) > 1 |||, labels: { severity: 'critical', }, annotations: { - message: '%(product)s Compactor %(alert_instance_variable)s in %(alert_aggregation_variables)s has found and ignored blocks with out of order chunks.' % $._config, + message: '%(product)s Compactor %(alert_instance_variable)s in %(alert_aggregation_variables)s has found and ignored unhealthy blocks.' % $._config, }, }, ], diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/config.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/config.libsonnet index ea48996a..63504168 100644 --- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/config.libsonnet +++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/config.libsonnet @@ -70,7 +70,7 @@ // docs/sources/mimir/manage/monitoring-grafana-mimir/requirements.md job_names: { ingester: ['ingester.*', 'cortex', 'mimir', 'mimir-write.*'], // Match also custom and per-zone ingester deployments. - distributor: ['distributor', 'cortex', 'mimir', 'mimir-write.*'], + distributor: ['distributor.*', 'cortex', 'mimir', 'mimir-write.*'], // Match also per-zone distributor deployments. querier: ['querier.*', 'cortex', 'mimir', 'mimir-read.*'], // Match also custom querier deployments. ruler_querier: ['ruler-querier.*'], // Match also custom querier deployments. ruler: ['ruler', 'cortex', 'mimir', 'mimir-backend.*'], @@ -78,15 +78,15 @@ ruler_query_frontend: ['ruler-query-frontend.*'], // Match also custom ruler-query-frontend deployments. query_scheduler: ['query-scheduler.*', 'mimir-backend.*'], // Not part of single-binary. Match also custom query-scheduler deployments. ruler_query_scheduler: ['ruler-query-scheduler.*'], // Not part of single-binary. Match also custom query-scheduler deployments. - ring_members: ['admin-api', 'alertmanager', 'compactor.*', 'distributor', 'ingester.*', 'querier.*', 'ruler', 'ruler-querier.*', 'store-gateway.*', 'cortex', 'mimir', 'mimir-write.*', 'mimir-read.*', 'mimir-backend.*'], + ring_members: ['admin-api', 'alertmanager', 'compactor.*', 'distributor.*', 'ingester.*', 'querier.*', 'ruler', 'ruler-querier.*', 'store-gateway.*', 'cortex', 'mimir', 'mimir-write.*', 'mimir-read.*', 'mimir-backend.*'], store_gateway: ['store-gateway.*', 'cortex', 'mimir', 'mimir-backend.*'], // Match also per-zone store-gateway deployments. - gateway: ['gateway', 'cortex-gw', 'cortex-gw-internal'], + gateway: ['gateway', 'cortex-gw.*'], // Match also custom and per-zone gateway deployments. compactor: ['compactor.*', 'cortex', 'mimir', 'mimir-backend.*'], // Match also custom compactor deployments. alertmanager: ['alertmanager', 'cortex', 'mimir', 'mimir-backend.*'], overrides_exporter: ['overrides-exporter', 'mimir-backend.*'], // The following are job matchers used to select all components in a given "path". - write: ['distributor', 'ingester.*', 'mimir-write.*'], + write: ['distributor.*', 'ingester.*', 'mimir-write.*'], read: ['query-frontend.*', 'querier.*', 'ruler-query-frontend.*', 'ruler-querier.*', 'mimir-read.*'], backend: ['ruler', 'query-scheduler.*', 'ruler-query-scheduler.*', 'store-gateway.*', 'compactor.*', 'alertmanager', 'overrides-exporter', 'mimir-backend.*'], }, @@ -593,35 +593,37 @@ }, // Whether autoscaling panels and alerts should be enabled for specific Mimir services. + autoscaling_hpa_prefix: 'keda-hpa-', + autoscaling: { query_frontend: { enabled: false, - hpa_name: 'keda-hpa-query-frontend', + hpa_name: $._config.autoscaling_hpa_prefix + 'query-frontend', }, ruler_query_frontend: { enabled: false, - hpa_name: 'keda-hpa-ruler-query-frontend', + hpa_name: $._config.autoscaling_hpa_prefix + 'ruler-query-frontend', }, querier: { enabled: false, // hpa_name can be a regexp to support multiple querier deployments, like "keda-hpa-querier(-burst(-backup)?)?". - hpa_name: 'keda-hpa-querier', + hpa_name: $._config.autoscaling_hpa_prefix + 'querier', }, ruler_querier: { enabled: false, - hpa_name: 'keda-hpa-ruler-querier', + hpa_name: $._config.autoscaling_hpa_prefix + 'ruler-querier', }, distributor: { enabled: false, - hpa_name: 'keda-hpa-distributor', + hpa_name: $._config.autoscaling_hpa_prefix + 'distributor', }, ruler: { enabled: false, - hpa_name: 'keda-hpa-ruler', + hpa_name: $._config.autoscaling_hpa_prefix + 'ruler', }, gateway: { enabled: false, - hpa_name: 'keda-hpa-cortex-gw.*', + hpa_name: $._config.autoscaling_hpa_prefix + 'cortex-gw.*', }, }, @@ -646,12 +648,12 @@ // Used to add additional services to dashboards that support it. extraServiceNames: [], - // When using rejecting inflight requests in ingesters early (using -ingester.limit-inflight-requests-using-grpc-method-limiter option), - // rejected requests will not count towards standard Mimir metrics like cortex_request_duration_seconds_count. - // Enabling this will make them visible on the dashboard again. + // When using early rejection of inflight requests in ingesters and distributors (using -ingester.limit-inflight-requests-using-grpc-method-limiter + // and -distributor.limit-inflight-requests-using-grpc-method-limiter options), rejected requests will not count towards standard Mimir metrics + // like cortex_request_duration_seconds_count. Enabling this will make them visible on the dashboard again. // - // Disabled by default, because when -ingester.limit-inflight-requests-using-grpc-method-limiter is not used (default), then rejected requests - // are already counted as failures. + // Disabled by default, because when -ingester.limit-inflight-requests-using-grpc-method-limiter and -distributor.limit-inflight-requests-using-grpc-method-limiter is + // not used (default), then rejected requests are already counted as failures. show_rejected_requests_on_writes_dashboard: false, }, } diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet index 7958e214..e3e5afe3 100644 --- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet +++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet @@ -526,10 +526,21 @@ local utils = import 'mixin-utils/utils.libsonnet'; hpa_name: $._config.autoscaling[field].hpa_name, cluster_labels: std.join(', ', $._config.cluster_labels), }, + ||| + kube_horizontalpodautoscaler_spec_min_replicas{%(namespace_matcher)s, horizontalpodautoscaler=~"%(hpa_name)s"} + # Add the scaletargetref_name label for readability + + on (%(cluster_labels)s, horizontalpodautoscaler) group_left (scaletargetref_name) + 0*kube_horizontalpodautoscaler_info{%(namespace_matcher)s, horizontalpodautoscaler=~"%(hpa_name)s"} + ||| % { + namespace_matcher: $.namespaceMatcher(), + hpa_name: $._config.autoscaling[field].hpa_name, + cluster_labels: std.join(', ', $._config.cluster_labels), + }, ], [ 'Max {{ scaletargetref_name }}', 'Current {{ scaletargetref_name }}', + 'Min {{ scaletargetref_name }}', ], ) + $.panelDescription( @@ -551,6 +562,11 @@ local utils = import 'mixin-utils/utils.libsonnet'; alias: '/Current .+/', fill: 0, }, + { + alias: '/Min .+/', + dashes: true, + fill: 0, + }, ], }, ) @@ -560,20 +576,29 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.queryPanel( [ ||| - sum by (scaledObject) ( - keda_metrics_adapter_scaler_metrics_value{metric=~".*cpu.*"} + sum by (scaler) ( + label_replace( + keda_scaler_metrics_value{%(cluster_label)s=~"$cluster", exported_namespace=~"$namespace", scaler=~".*cpu.*"}, + "namespace", "$1", "exported_namespace", "(.*)" + ) / - on(metric) group_left label_replace( + on(%(aggregation_labels)s, scaledObject, metric) group_left label_replace( + label_replace( kube_horizontalpodautoscaler_spec_target_metric{%(namespace)s, horizontalpodautoscaler=~"%(hpa_name)s"}, "metric", "$1", "metric_name", "(.+)" + ), + "scaledObject", "$1", "horizontalpodautoscaler", "%(hpa_prefix)s(.*)" ) ) ||| % { + aggregation_labels: $._config.alert_aggregation_labels, + cluster_label: $._config.per_cluster_label, + hpa_prefix: $._config.autoscaling_hpa_prefix, hpa_name: $._config.autoscaling[field].hpa_name, namespace: $.namespaceMatcher(), }, ], [ - '{{ scaledObject }}', + '{{ scaler }}', ] ) + $.panelDescription( @@ -590,20 +615,29 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.queryPanel( [ ||| - sum by (scaledObject) ( - keda_metrics_adapter_scaler_metrics_value{metric=~".*memory.*"} + sum by (scaler) ( + label_replace( + keda_scaler_metrics_value{%(cluster_label)s=~"$cluster", exported_namespace=~"$namespace", scaler=~".*memory.*"}, + "namespace", "$1", "exported_namespace", "(.*)" + ) / - on(metric) group_left label_replace( + on(%(aggregation_labels)s, scaledObject, metric) group_left label_replace( + label_replace( kube_horizontalpodautoscaler_spec_target_metric{%(namespace)s, horizontalpodautoscaler=~"%(hpa_name)s"}, "metric", "$1", "metric_name", "(.+)" + ), + "scaledObject", "$1", "horizontalpodautoscaler", "%(hpa_prefix)s(.*)" ) ) ||| % { + aggregation_labels: $._config.alert_aggregation_labels, + cluster_label: $._config.per_cluster_label, + hpa_prefix: $._config.autoscaling_hpa_prefix, hpa_name: $._config.autoscaling[field].hpa_name, namespace: $.namespaceMatcher(), }, ], [ - '{{ scaledObject }}', + '{{ scaler }}', ] ) + $.panelDescription( @@ -618,8 +652,8 @@ local utils = import 'mixin-utils/utils.libsonnet'; local title = 'Autoscaler failures rate'; $.panel(title) + $.queryPanel( - $.filterKedaMetricByHPA('sum by(metric) (rate(keda_metrics_adapter_scaler_errors[$__rate_interval]))', $._config.autoscaling[field].hpa_name), - '{{metric}} failures' + $.filterKedaScalerErrorsByHPA($._config.autoscaling[field].hpa_name), + '{{scaler}} failures' ) + $.panelDescription( title, @@ -1069,18 +1103,27 @@ local utils = import 'mixin-utils/utils.libsonnet'; namespaceMatcher: $.namespaceMatcher(), }, - filterKedaMetricByHPA(query, hpa_name):: + filterKedaScalerErrorsByHPA(hpa_name):: ||| - %(query)s + - on(metric) group_left + sum by(%(aggregation_labels)s, scaler, metric, scaledObject) ( + label_replace( + rate(keda_scaler_errors[$__rate_interval]), + "namespace", "$1", "exported_namespace", "(.+)" + ) + ) + + on(%(aggregation_labels)s, metric, scaledObject) group_left label_replace( - kube_horizontalpodautoscaler_spec_target_metric{%(namespace)s, horizontalpodautoscaler=~"%(hpa_name)s"} - * 0, "metric", "$1", "metric_name", "(.+)" + label_replace( + kube_horizontalpodautoscaler_spec_target_metric{%(namespace)s, horizontalpodautoscaler=~"%(hpa_name)s"} * 0, + "scaledObject", "$1", "horizontalpodautoscaler", "%(hpa_prefix)s(.*)" + ), + "metric", "$1", "metric_name", "(.+)" ) ||| % { - query: query, hpa_name: hpa_name, + hpa_prefix: $._config.autoscaling_hpa_prefix, namespace: $.namespaceMatcher(), + aggregation_labels: $._config.alert_aggregation_labels, }, // panelAxisPlacement allows to place a series on the right axis. @@ -1163,4 +1206,10 @@ local utils = import 'mixin-utils/utils.libsonnet'; replaceFields: replaceFields, }), + lokiMetricsQueryPanel(queries, legends='', unit='short'):: + super.queryPanel(queries, legends) + + { + datasource: '${lokidatasource}', + yaxes: $.yaxes(unit), + }, } diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/queries.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/queries.libsonnet index d0e79a0e..7189c8a1 100644 --- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/queries.libsonnet +++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/queries.libsonnet @@ -181,7 +181,13 @@ local filename = 'mimir-queries.json'; .addPanel( $.panel('Consistency checks failed') + $.failurePanel('sum(rate(cortex_querier_blocks_consistency_checks_failed_total{%s}[$__rate_interval])) / sum(rate(cortex_querier_blocks_consistency_checks_total{%s}[$__rate_interval]))' % [$.jobMatcher($._config.job_names.querier), $.jobMatcher($._config.job_names.querier)], 'Failure Rate') + - { yaxes: $.yaxes({ format: 'percentunit', max: 1 }) }, + { yaxes: $.yaxes({ format: 'percentunit', max: 1 }) } + + $.panelDescription( + 'Consistency checks failed', + ||| + Rate of queries that had to run with consistency checks and those checks failed. A failed consistency check means that some of at least one block which had to be queried wasn't present in any of the store-gateways. + ||| + ), ) .addPanel( $.panel('Rejected queries') + diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/reads.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/reads.libsonnet index 662975f3..3bc6e25f 100644 --- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/reads.libsonnet +++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/reads.libsonnet @@ -288,10 +288,21 @@ local filename = 'mimir-reads.json'; cluster_labels: std.join(', ', $._config.cluster_labels), hpa_name: $._config.autoscaling.querier.hpa_name, }, + ||| + kube_horizontalpodautoscaler_spec_min_replicas{%(namespace_matcher)s, horizontalpodautoscaler=~"%(hpa_name)s"} + # Add the scaletargetref_name label which is more readable than "kube-hpa-..." + + on (%(cluster_labels)s, horizontalpodautoscaler) group_left (scaletargetref_name) + 0*kube_horizontalpodautoscaler_info{%(namespace_matcher)s, horizontalpodautoscaler=~"%(hpa_name)s"} + ||| % { + namespace_matcher: $.namespaceMatcher(), + cluster_labels: std.join(', ', $._config.cluster_labels), + hpa_name: $._config.autoscaling.querier.hpa_name, + }, ], [ 'Max {{ scaletargetref_name }}', 'Current {{ scaletargetref_name }}', + 'Min {{ scaletargetref_name }}', ], ) + $.panelDescription( @@ -313,6 +324,11 @@ local filename = 'mimir-reads.json'; alias: '/Current .+/', fill: 0, }, + { + alias: '/Min .+/', + dashes: true, + fill: 0, + }, ], } ) @@ -322,18 +338,27 @@ local filename = 'mimir-reads.json'; $.queryPanel( [ ||| - sum by (scaledObject) ( - keda_metrics_adapter_scaler_metrics_value - / - on(metric) group_left + sum by (scaler) ( label_replace( - kube_horizontalpodautoscaler_spec_target_metric{%s, horizontalpodautoscaler=~"%s"}, - "metric", "$1", "metric_name", "(.+)" + keda_scaler_metrics_value{%(cluster_label)s=~"$cluster", exported_namespace=~"$namespace"}, + "namespace", "$1", "exported_namespace", "(.*)" ) + / + on(%(aggregation_labels)s, scaledObject, metric) group_left + label_replace(label_replace( + kube_horizontalpodautoscaler_spec_target_metric{%(namespace)s, horizontalpodautoscaler=~"%(hpa_name)s"}, + "metric", "$1", "metric_name", "(.+)" + ), "scaledObject", "$1", "horizontalpodautoscaler", "%(hpa_prefix)s(.*)") ) - ||| % [$.namespaceMatcher(), $._config.autoscaling.querier.hpa_name], + ||| % { + aggregation_labels: $._config.alert_aggregation_labels, + cluster_label: $._config.per_cluster_label, + hpa_prefix: $._config.autoscaling_hpa_prefix, + hpa_name: $._config.autoscaling.querier.hpa_name, + namespace: $.namespaceMatcher(), + }, ], [ - '{{ scaledObject }}', + '{{ scaler }}', ] ) + $.panelDescription( @@ -349,8 +374,8 @@ local filename = 'mimir-reads.json'; local title = 'Autoscaler failures rate'; $.panel(title) + $.queryPanel( - $.filterKedaMetricByHPA('sum by(metric) (rate(keda_metrics_adapter_scaler_errors[$__rate_interval]))', $._config.autoscaling.querier.hpa_name), - '{{metric}} failures' + $.filterKedaScalerErrorsByHPA($._config.autoscaling.querier.hpa_name), + '{{scaler}} failures' ) + $.panelDescription( title, diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/remote-ruler-reads.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/remote-ruler-reads.libsonnet index 9c5ec298..11648274 100644 --- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/remote-ruler-reads.libsonnet +++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/remote-ruler-reads.libsonnet @@ -99,99 +99,6 @@ local filename = 'mimir-remote-ruler-reads.json'; ) .addRowIf( $._config.autoscaling.ruler_querier.enabled, - $.row('Querier (dedicated to ruler) - autoscaling') - .addPanel( - local title = 'Replicas'; - $.panel(title) + - $.queryPanel( - [ - 'kube_horizontalpodautoscaler_spec_min_replicas{%s, horizontalpodautoscaler="%s"}' % [$.namespaceMatcher(), $._config.autoscaling.ruler_querier.hpa_name], - 'kube_horizontalpodautoscaler_spec_max_replicas{%s, horizontalpodautoscaler="%s"}' % [$.namespaceMatcher(), $._config.autoscaling.ruler_querier.hpa_name], - 'kube_horizontalpodautoscaler_status_current_replicas{%s, horizontalpodautoscaler="%s"}' % [$.namespaceMatcher(), $._config.autoscaling.ruler_querier.hpa_name], - ], - [ - 'Min', - 'Max', - 'Current', - ], - ) + - $.panelDescription( - title, - ||| - The minimum, maximum, and current number of querier replicas. - ||| - ), - ) - .addPanel( - local title = 'Scaling metric (CPU): Desired replicas'; - $.panel(title) + - $.queryPanel( - [ - ||| - keda_metrics_adapter_scaler_metrics_value{metric!~".*memory.*"} - / - on(metric) group_left label_replace( - kube_horizontalpodautoscaler_spec_target_metric{%(namespace)s, horizontalpodautoscaler=~"%(hpa_name)s"}, - "metric", "$1", "metric_name", "(.+)" - ) - ||| % { - hpa_name: $._config.autoscaling.ruler_querier.hpa_name, - namespace: $.namespaceMatcher(), - }, - ], [ - '{{ scaledObject }}', - ] - ) + - $.panelDescription( - title, - ||| - This panel shows the scaling metric exposed by KEDA divided by the target/threshold used. - It should represent the desired number of replicas, ignoring the min/max constraints applied later. - ||| - ), - ) - .addPanel( - local title = 'Scaling metric (memory): Desired replicas'; - $.panel(title) + - $.queryPanel( - [ - ||| - keda_metrics_adapter_scaler_metrics_value{metric=~".*memory.*"} - / - on(metric) group_left label_replace( - kube_horizontalpodautoscaler_spec_target_metric{%(namespace)s, horizontalpodautoscaler=~"%(hpa_name)s"}, - "metric", "$1", "metric_name", "(.+)" - ) - ||| % { - hpa_name: $._config.autoscaling.ruler_querier.hpa_name, - namespace: $.namespaceMatcher(), - }, - ], [ - '{{ scaledObject }}', - ] - ) + - $.panelDescription( - title, - ||| - This panel shows the scaling metric exposed by KEDA divided by the target/threshold used. - It should represent the desired number of replicas, ignoring the min/max constraints applied later. - ||| - ), - ) - .addPanel( - local title = 'Autoscaler failures rate'; - $.panel(title) + - $.queryPanel( - $.filterKedaMetricByHPA('sum by(metric) (rate(keda_metrics_adapter_scaler_errors[$__rate_interval]))', $._config.autoscaling.ruler_querier.hpa_name), - '{{metric}} failures' - ) + - $.panelDescription( - title, - ||| - The rate of failures in the KEDA custom metrics API server. Whenever an error occurs, the KEDA custom - metrics server is unable to query the scaling metric from Prometheus so the autoscaler does not work properly. - ||| - ), - ) + $.cpuAndMemoryBasedAutoScalingRow('Ruler-Querier'), ), } diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/rollout-progress.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/rollout-progress.libsonnet index 69ba7f4d..b019afa8 100644 --- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/rollout-progress.libsonnet +++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/rollout-progress.libsonnet @@ -382,7 +382,7 @@ local filename = 'mimir-rollout-progress.json'; // // Performance comparison with 24h ago // - $.panel('Latency vs 24h ago') + + $.timeseriesPanel('Latency vs 24h ago') + $.queryPanel([||| 1 - ( avg_over_time(histogram_quantile(0.99, sum by (le) (%(per_cluster_label)s_job_route:cortex_request_duration_seconds_bucket:sum_rate{%(write_job_matcher)s, route=~"%(write_http_routes_regex)s"} offset 24h))[1h:]) @@ -395,12 +395,18 @@ local filename = 'mimir-rollout-progress.json'; / avg_over_time(histogram_quantile(0.99, sum by (le) (%(per_cluster_label)s_job_route:cortex_request_duration_seconds_bucket:sum_rate{%(read_job_matcher)s, route=~"%(read_http_routes_regex)s"}))[1h:]) ) - ||| % config], ['writes', 'reads']) + { - yaxes: $.yaxes({ - format: 'percentunit', - min: null, // Can be negative. - }), - + ||| % config], ['writes', 'reads']) + + { + fieldConfig: { + defaults: { + unit: 'percentunit', + custom: { + fillOpacity: 10, + }, + }, + }, + } + + { id: 12, gridPos: { h: 8, w: 8, x: 16, y: 8 }, }, diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/ruler.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/ruler.libsonnet index bc5066a5..0819f8b0 100644 --- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/ruler.libsonnet +++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/ruler.libsonnet @@ -137,7 +137,13 @@ local filename = 'mimir-ruler.json'; .addPanel( $.panel('Consistency checks failed') + $.failurePanel('sum(rate(cortex_querier_blocks_consistency_checks_failed_total{%s}[$__rate_interval])) / sum(rate(cortex_querier_blocks_consistency_checks_total{%s}[$__rate_interval]))' % [$.jobMatcher($._config.job_names.ruler), $.jobMatcher($._config.job_names.ruler)], 'Failures / sec') + - { yaxes: $.yaxes({ format: 'percentunit', max: 1 }) }, + { yaxes: $.yaxes({ format: 'percentunit', max: 1 }) } + + $.panelDescription( + 'Consistency checks failed', + ||| + Rate of queries that had to run with consistency checks and those checks failed. A failed consistency check means that some of at least one block which had to be queried wasn't present in any of the store-gateways. + ||| + ), ) ) .addRow( diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/slow-queries.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/slow-queries.libsonnet index 9f686950..466e1efd 100644 --- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/slow-queries.libsonnet +++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/slow-queries.libsonnet @@ -5,10 +5,154 @@ local filename = 'mimir-slow-queries.json'; [filename]: ($.dashboard('Slow queries') + { uid: std.md5(filename) }) .addClusterSelectorTemplates(false) + .addRow( + $.row('Accross tenants') + .addPanel( + $.panel('Response time') + + $.lokiMetricsQueryPanel( + [ + 'quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()' % [$._config.per_cluster_label, $._config.per_namespace_label], + 'quantile_over_time(0.5, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()' % [$._config.per_cluster_label, $._config.per_namespace_label], + ], + ['p99', 'p50'], + unit='s', + ) + ) + .addPanel( + $.panel('Fetched series') + + $.lokiMetricsQueryPanel( + [ + 'quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()' % [$._config.per_cluster_label, $._config.per_namespace_label], + 'quantile_over_time(0.5, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()' % [$._config.per_cluster_label, $._config.per_namespace_label], + ], + ['p99', 'p50'], + ) + ) + .addPanel( + $.panel('Fetched chunks') + + $.lokiMetricsQueryPanel( + [ + 'quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()' % [$._config.per_cluster_label, $._config.per_namespace_label], + 'quantile_over_time(0.5, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()' % [$._config.per_cluster_label, $._config.per_namespace_label], + ], + ['p99', 'p50'], + unit='bytes', + ) + ) + .addPanel( + $.panel('Response size') + + $.lokiMetricsQueryPanel( + [ + 'quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()' % [$._config.per_cluster_label, $._config.per_namespace_label], + 'quantile_over_time(0.5, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()' % [$._config.per_cluster_label, $._config.per_namespace_label], + ], + ['p99', 'p50'], + unit='bytes', + ) + ) + .addPanel( + $.panel('Time span') + + $.lokiMetricsQueryPanel( + [ + 'quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()' % [$._config.per_cluster_label, $._config.per_namespace_label], + 'quantile_over_time(0.5, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()' % [$._config.per_cluster_label, $._config.per_namespace_label], + ], + ['p99', 'p50'], + unit='s', + ) + ) + ) + .addRow( + $.row('Top 10 tenants') { collapse: true } + .addPanel( + $.panel('P99 response time') + + $.lokiMetricsQueryPanel( + 'topk(10, quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user))' % [$._config.per_cluster_label, $._config.per_namespace_label], + '{{user}}', + unit='s', + ) + ) + .addPanel( + $.panel('P99 fetched series') + + $.lokiMetricsQueryPanel( + 'topk(10, quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user))' % [$._config.per_cluster_label, $._config.per_namespace_label], + '{{user}}', + ) + ) + .addPanel( + $.panel('P99 fetched chunks') + + $.lokiMetricsQueryPanel( + 'topk(10, quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user))' % [$._config.per_cluster_label, $._config.per_namespace_label], + '{{user}}', + unit='bytes', + ) + ) + .addPanel( + $.panel('P99 response size') + + $.lokiMetricsQueryPanel( + 'topk(10, quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user))' % [$._config.per_cluster_label, $._config.per_namespace_label], + '{{user}}', + unit='bytes', + ) + ) + .addPanel( + $.panel('P99 time span') + + $.lokiMetricsQueryPanel( + 'topk(10, quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user))' % [$._config.per_cluster_label, $._config.per_namespace_label], + '{{user}}', + unit='s', + ) + ) + ) + .addRow( + ( + $.row('Top 10 User-Agents') { collapse: true } + .addPanel( + $.panel('P99 response time') + + $.lokiMetricsQueryPanel( + 'topk(10, quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user_agent))' % [$._config.per_cluster_label, $._config.per_namespace_label], + '{{user_agent}}', + unit='s', + ) + ) + .addPanel( + $.panel('P99 fetched series') + + $.lokiMetricsQueryPanel( + 'topk(10, quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user_agent))' % [$._config.per_cluster_label, $._config.per_namespace_label], + '{{user_agent}}', + ) + ) + .addPanel( + $.panel('P99 fetched chunks') + + $.lokiMetricsQueryPanel( + 'topk(10, quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user_agent))' % [$._config.per_cluster_label, $._config.per_namespace_label], + '{{user_agent}}', + unit='bytes', + ) + ) + .addPanel( + $.panel('P99 response size') + + $.lokiMetricsQueryPanel( + 'topk(10, quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user_agent))' % [$._config.per_cluster_label, $._config.per_namespace_label], + '{{user_agent}}', + unit='bytes', + ) + ) + .addPanel( + $.panel('P99 time span') + + $.lokiMetricsQueryPanel( + 'topk(10, quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user_agent))' % [$._config.per_cluster_label, $._config.per_namespace_label], + '{{user_agent}}', + unit='s', + ) + ) + ) + ) .addRow( $.row('') .addPanel( { + height: '500px', title: 'Slow queries', type: 'table', datasource: '${lokidatasource}', @@ -17,7 +161,7 @@ local filename = 'mimir-slow-queries.json'; targets: [ { // Filter out the remote read endpoint. - expr: '{%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | response_time > ${min_duration}' % [$._config.per_cluster_label, $._config.per_namespace_label], + expr: '{%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration}' % [$._config.per_cluster_label, $._config.per_namespace_label], instant: false, legendFormat: '', range: true, @@ -34,27 +178,11 @@ local filename = 'mimir-slow-queries.json'; source: 'labels', }, }, - { - // Compute the query time range. - id: 'calculateField', - options: { - alias: 'Time range', - mode: 'binary', - binary: { - left: 'param_end', - operator: '-', - reducer: 'sum', - right: 'param_start', - }, - reduce: { reducer: 'sum' }, - replaceFields: false, - }, - }, { id: 'organize', options: { // Hide fields we don't care. - local hiddenFields = ['caller', 'cluster', 'container', 'host', 'id', 'job', 'level', 'line', 'method', 'msg', 'name', 'namespace', 'param_end', 'param_start', 'param_time', 'path', 'pod', 'pod_template_hash', 'query_wall_time_seconds', 'stream', 'traceID', 'tsNs', 'labels', 'Line', 'Time'], + local hiddenFields = ['caller', 'cluster', 'container', 'host', 'id', 'job', 'level', 'line', 'method', 'msg', 'name', 'namespace', 'path', 'pod', 'pod_template_hash', 'query_wall_time_seconds', 'stream', 'traceID', 'tsNs', 'labels', 'Line', 'Time'], excludeByName: { [field]: true @@ -62,7 +190,7 @@ local filename = 'mimir-slow-queries.json'; }, // Order fields. - local orderedFields = ['ts', 'user', 'param_query', 'Time range', 'param_step', 'response_time'], + local orderedFields = ['ts', 'user', 'length', 'param_start', 'param_end', 'param_time', 'param_step', 'param_query', 'response_time'], indexByName: { [orderedFields[i]]: i @@ -170,6 +298,29 @@ local filename = 'mimir-slow-queries.json'; }, query: defaultValue, }, + // Add a variable to configure the tenant to filter on. + { + local defaultValue = '.*', + + type: 'textbox', + name: 'user_agent', + label: 'User-Agent HTTP Header', + hide: 0, + options: [ + { + selected: true, + text: defaultValue, + value: defaultValue, + }, + ], + current: { + // Default value. + selected: true, + text: defaultValue, + value: defaultValue, + }, + query: defaultValue, + }, ], }, } + { diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/tenants.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/tenants.libsonnet index 3f3f8415..df9f6a30 100644 --- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/tenants.libsonnet +++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/tenants.libsonnet @@ -620,6 +620,58 @@ local filename = 'mimir-tenants.json'; ) ) + .addRow( + $.row('Alertmanager') + .addPanel( + $.panel('Alerts') + + $.queryPanel( + [ + 'sum by (user) (cortex_alertmanager_alerts{%(job)s, user="$user"})' % { job: $.jobMatcher($._config.job_names.alertmanager) }, + 'sum by (user) (cortex_alertmanager_silences{%(job)s, user="$user"})' % { job: $.jobMatcher($._config.job_names.alertmanager) }, + ], + ['alerts', 'silences'] + ) + ) + .addPanel( + $.panel('NPS') + + $.successFailurePanel( + ||| + ( + sum(rate(cortex_alertmanager_notifications_total{%(job)s, user="$user"}[$__rate_interval])) + - + on() (sum(rate(cortex_alertmanager_notifications_failed_total{%(job)s, user="$user"}[$__rate_interval])) or on () vector(0)) + ) > 0 + ||| % { + job: $.jobMatcher($._config.job_names.alertmanager), + }, + 'sum(rate(cortex_alertmanager_notifications_failed_total{%(job)s, user="$user"}[$__rate_interval]))' % { + job: $.jobMatcher($._config.job_names.alertmanager), + }, + ) + ) + .addPanel( + $.panel('NPS by integration') + + $.queryPanel( + [ + ||| + ( + sum(rate(cortex_alertmanager_notifications_total{%(job)s, user="$user"}[$__rate_interval])) by(integration) + - + (sum(rate(cortex_alertmanager_notifications_failed_total{%(job)s, user="$user"}[$__rate_interval])) by(integration) or + (sum(rate(cortex_alertmanager_notifications_total{%(job)s, user="$user"}[$__rate_interval])) by(integration) * 0) + )) > 0 + ||| % { + job: $.jobMatcher($._config.job_names.alertmanager), + }, + 'sum(rate(cortex_alertmanager_notifications_failed_total{%(job)s, user="$user"}[$__rate_interval])) by(integration)' % { + job: $.jobMatcher($._config.job_names.alertmanager), + }, + ], + ['success - {{ integration }}', 'failed - {{ integration }}'] + ) + ) + ) + .addRow( $.row('Read Path - Queries (User)') .addPanel( diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/writes.libsonnet b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/writes.libsonnet index 2fb7773b..58041cf4 100644 --- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/writes.libsonnet +++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/writes.libsonnet @@ -124,7 +124,31 @@ local filename = 'mimir-writes.json'; $.row('Distributor') .addPanel( $.panel('Requests / sec') + - $.qpsPanel($.queries.distributor.writeRequestsPerSecond) + $.panelDescription( + 'Requests / sec', + ||| + The rate of successful, failed and rejected requests to distributor. + Rejected requests are requests that distributor fails to handle because of distributor instance limits. + When distributor is configured to use "early" request rejection, then rejected requests are NOT included in other metrics. + When distributor is not configured to use "early" request rejection, then rejected requests are also counted as "errors". + ||| + ) + + $.qpsPanel($.queries.distributor.writeRequestsPerSecond) + + if $._config.show_rejected_requests_on_writes_dashboard then { + targets: [ + { + legendLink: null, + expr: 'sum (rate(cortex_distributor_instance_rejected_requests_total{%s}[$__rate_interval]))' % [$.jobMatcher($._config.job_names.distributor)], + format: 'time_series', + intervalFactor: 2, + legendFormat: 'rejected', + refId: 'B', + }, + ] + super.targets, + aliasColors+: { + rejected: '#EAB839', + }, + } else {}, ) .addPanel( $.panel('Latency') + @@ -146,7 +170,7 @@ local filename = 'mimir-writes.json'; 'Requests / sec', ||| The rate of successful, failed and rejected requests to ingester. - Rejected requests are requests that ingester fails to handle because of ingester instance limits (ingester-max-inflight-push-requests and ingester-max-ingestion-rate). + Rejected requests are requests that ingester fails to handle because of ingester instance limits (ingester-max-inflight-push-requests, ingester-max-inflight-push-requests-bytes, ingester-max-ingestion-rate). When ingester is configured to use "early" request rejection, then rejected requests are NOT included in other metrics. When ingester is not configured to use "early" request rejection, then rejected requests are also counted as "errors". ||| diff --git a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/jsonnetfile.lock.json b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/jsonnetfile.lock.json index fbb6838e..466f1217 100644 --- a/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/jsonnetfile.lock.json +++ b/monitoring-mixins/loki-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/jsonnetfile.lock.json @@ -8,8 +8,8 @@ "subdir": "grafana-builder" } }, - "version": "a993c3a51e2c5d7f4ad9014545103b3f919435b5", - "sum": "xEFMv4+ObwP5L1Wu0XK5agWci4AJzNApys6iKAQxLlQ=" + "version": "cbcc611b638d7172bbb4ce06edda2c8e27d4c9ca", + "sum": "aCN8uCrs2PDLR0SzRAuwZ6C5hiKt1KggCUCT7/F8yZ0=" }, { "source": { diff --git a/monitoring-mixins/mimir-mixin/deploy/alerts.yaml b/monitoring-mixins/mimir-mixin/deploy/alerts.yaml index f3aee699..02da208d 100644 --- a/monitoring-mixins/mimir-mixin/deploy/alerts.yaml +++ b/monitoring-mixins/mimir-mixin/deploy/alerts.yaml @@ -197,6 +197,15 @@ groups: for: 5m labels: severity: warning + - alert: MimirStoreGatewayTooManyFailedOperations + annotations: + message: Mimir store-gateway {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ $value | humanizePercentage }} errors while doing {{ $labels.operation }} on the object storage. + runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirstoregatewaytoomanyfailedoperations + expr: | + sum by(cluster, namespace, operation) (rate(thanos_objstore_bucket_operation_failures_total{component="store-gateway"}[1m])) > 0 + for: 5m + labels: + severity: warning - alert: MimirRingMembersMismatch annotations: message: | @@ -476,7 +485,7 @@ groups: expr: | max by (cluster, namespace) (memberlist_client_cluster_members_count) > - (sum by (cluster, namespace) (up{job=~".+/(admin-api|alertmanager|compactor.*|distributor|ingester.*|querier.*|ruler|ruler-querier.*|store-gateway.*|cortex|mimir|mimir-write.*|mimir-read.*|mimir-backend.*)"}) + 10) + (sum by (cluster, namespace) (up{job=~".+/(admin-api|alertmanager|compactor.*|distributor.*|ingester.*|querier.*|ruler|ruler-querier.*|store-gateway.*|cortex|mimir|mimir-write.*|mimir-read.*|mimir-backend.*)"}) + 10) for: 20m labels: severity: warning @@ -487,7 +496,7 @@ groups: expr: | min by (cluster, namespace) (memberlist_client_cluster_members_count) < - (sum by (cluster, namespace) (up{job=~".+/(admin-api|alertmanager|compactor.*|distributor|ingester.*|querier.*|ruler|ruler-querier.*|store-gateway.*|cortex|mimir|mimir-write.*|mimir-read.*|mimir-backend.*)"}) * 0.5) + (sum by (cluster, namespace) (up{job=~".+/(admin-api|alertmanager|compactor.*|distributor.*|ingester.*|querier.*|ruler|ruler-querier.*|store-gateway.*|cortex|mimir|mimir-write.*|mimir-read.*|mimir-backend.*)"}) * 0.5) for: 20m labels: severity: warning diff --git a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-alertmanager-resources.json b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-alertmanager-resources.json index 8b774f44..a28d94d5 100644 --- a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-alertmanager-resources.json +++ b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-alertmanager-resources.json @@ -732,7 +732,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-alertmanager.json b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-alertmanager.json index 0d3b55a6..3dca3835 100644 --- a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-alertmanager.json +++ b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-alertmanager.json @@ -2554,7 +2554,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-compactor-resources.json b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-compactor-resources.json index b97bfe04..0cc92356 100644 --- a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-compactor-resources.json +++ b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-compactor-resources.json @@ -831,7 +831,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-compactor.json b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-compactor.json index 6dd67b2d..f8a7710b 100644 --- a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-compactor.json +++ b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-compactor.json @@ -2140,7 +2140,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-config.json b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-config.json index d8710f62..24c0a647 100644 --- a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-config.json +++ b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-config.json @@ -216,7 +216,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-object-store.json b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-object-store.json index 0228aa11..c873dc5e 100644 --- a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-object-store.json +++ b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-object-store.json @@ -822,7 +822,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-overrides.json b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-overrides.json index 269996b0..c832ac2b 100644 --- a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-overrides.json +++ b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-overrides.json @@ -155,7 +155,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-overview-networking.json b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-overview-networking.json index 99c88ea0..e9681904 100644 --- a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-overview-networking.json +++ b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-overview-networking.json @@ -1022,7 +1022,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-overview-resources.json b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-overview-resources.json index 92a64f30..db978baf 100644 --- a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-overview-resources.json +++ b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-overview-resources.json @@ -1208,7 +1208,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-overview.json b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-overview.json index adc5a987..5a43b801 100644 --- a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-overview.json +++ b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-overview.json @@ -81,7 +81,7 @@ "uid": "$datasource" }, "exemplar": false, - "expr": "(\n # gRPC errors are not tracked as 5xx but \"error\".\n sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.*|error\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", + "expr": "(\n # gRPC errors are not tracked as 5xx but \"error\".\n sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.*|error\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", "instant": false, "legendFormat": "Writes", "range": true @@ -215,7 +215,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -289,21 +289,21 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", "format": "time_series", "legendFormat": "99th percentile", "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", "format": "time_series", "legendFormat": "50th percentile", "refId": "B", "step": 10 }, { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", "format": "time_series", "legendFormat": "Average", "refId": "C", @@ -378,13 +378,13 @@ "steppedLine": false, "targets": [ { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})", + "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", "format": "time_series", "legendFormat": "samples / sec", "legendLink": null }, { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})", + "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", "format": "time_series", "legendFormat": "exemplars / sec", "legendLink": null @@ -1355,7 +1355,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-queries.json b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-queries.json index ee6f9aac..b55c8b66 100644 --- a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-queries.json +++ b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-queries.json @@ -3043,7 +3043,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-reads-networking.json b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-reads-networking.json index b11cc688..e5a13c87 100644 --- a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-reads-networking.json +++ b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-reads-networking.json @@ -2000,7 +2000,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-reads-resources.json b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-reads-resources.json index c18f9b82..f68d5fe5 100644 --- a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-reads-resources.json +++ b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-reads-resources.json @@ -2510,7 +2510,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-reads.json b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-reads.json index c6ff6642..1c869013 100644 --- a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-reads.json +++ b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-reads.json @@ -4251,7 +4251,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-remote-ruler-reads-resources.json b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-remote-ruler-reads-resources.json index 523b5894..0ef7537c 100644 --- a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-remote-ruler-reads-resources.json +++ b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-remote-ruler-reads-resources.json @@ -890,7 +890,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-remote-ruler-reads.json b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-remote-ruler-reads.json index afa77344..a6ce4ee1 100644 --- a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-remote-ruler-reads.json +++ b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-remote-ruler-reads.json @@ -805,7 +805,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-rollout-progress.json b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-rollout-progress.json index d954aa1d..1319cd22 100644 --- a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-rollout-progress.json +++ b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-rollout-progress.json @@ -242,7 +242,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -354,7 +354,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -462,7 +462,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -574,7 +574,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))\n", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))\n", "format": null, "instant": false, "interval": "", @@ -1253,12 +1253,15 @@ "type": "table" }, { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": "$datasource", - "fill": 1, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + }, + "unit": "percentunit" + } + }, "gridPos": { "h": 8, "w": 8, @@ -1266,31 +1269,19 @@ "y": 8 }, "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, "targets": [ { - "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))[1h:])\n)\n", + "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))[1h:])\n)\n", "format": "time_series", "legendFormat": "writes", "legendLink": null @@ -1302,41 +1293,8 @@ "legendLink": null } ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, "title": "Latency vs 24h ago", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] + "type": "timeseries" } ], "refresh": "10s", @@ -1354,7 +1312,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-ruler.json b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-ruler.json index 5d7d6818..d88c1b51 100644 --- a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-ruler.json +++ b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-ruler.json @@ -2460,7 +2460,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-scaling.json b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-scaling.json index 72c841b1..f31e49a4 100644 --- a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-scaling.json +++ b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-scaling.json @@ -267,7 +267,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-slow-queries.json b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-slow-queries.json index 0ae3dd5b..167237bc 100644 --- a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-slow-queries.json +++ b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-slow-queries.json @@ -30,6 +30,1182 @@ ], "refresh": "", "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Response time", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Fetched series", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Fetched chunks", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Response size", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Time span", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Accross tenants", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 response time", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 fetched series", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 fetched chunks", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 response size", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 time span", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Top 10 tenants", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 response time", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 fetched series", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 fetched chunks", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 response size", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 time span", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Top 10 User-Agents", + "titleSize": "h6" + }, { "collapse": false, "height": "250px", @@ -77,11 +1253,12 @@ } ] }, - "id": 1, + "height": "500px", + "id": 16, "span": 12, "targets": [ { - "expr": "{cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | response_time > ${min_duration}", + "expr": "{cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration}", "instant": false, "legendFormat": "", "range": true, @@ -96,23 +1273,6 @@ "source": "labels" } }, - { - "id": "calculateField", - "options": { - "alias": "Time range", - "binary": { - "left": "param_end", - "operator": "-", - "reducer": "sum", - "right": "param_start" - }, - "mode": "binary", - "reduce": { - "reducer": "sum" - }, - "replaceFields": false - } - }, { "id": "organize", "options": { @@ -132,9 +1292,6 @@ "msg": true, "name": true, "namespace": true, - "param_end": true, - "param_start": true, - "param_time": true, "path": true, "pod": true, "pod_template_hash": true, @@ -144,10 +1301,13 @@ "tsNs": true }, "indexByName": { - "Time range": 3, - "param_query": 2, - "param_step": 4, - "response_time": 5, + "length": 2, + "param_end": 4, + "param_query": 7, + "param_start": 3, + "param_step": 6, + "param_time": 5, + "response_time": 8, "ts": 0, "user": 1 }, @@ -184,7 +1344,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -284,6 +1444,25 @@ ], "query": ".*", "type": "textbox" + }, + { + "current": { + "selected": true, + "text": ".*", + "value": ".*" + }, + "hide": 0, + "label": "User-Agent HTTP Header", + "name": "user_agent", + "options": [ + { + "selected": true, + "text": ".*", + "value": ".*" + } + ], + "query": ".*", + "type": "textbox" } ] }, diff --git a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-tenants.json b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-tenants.json index b7504de5..0a661efa 100644 --- a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-tenants.json +++ b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-tenants.json @@ -96,7 +96,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(\n (\n cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n - cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n (\n cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n - cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "in-memory", "legendLink": null @@ -108,13 +108,13 @@ "legendLink": null }, { - "expr": "sum(\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "active", "legendLink": null }, { - "expr": "sum by (name) (\n cortex_ingester_active_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n) > 0\n", + "expr": "sum by (name) (\n cortex_ingester_active_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", "format": "time_series", "legendFormat": "active ({{ name }})", "legendLink": null @@ -195,13 +195,13 @@ "steppedLine": false, "targets": [ { - "expr": "sum(\n cortex_ingester_active_native_histogram_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n cortex_ingester_active_native_histogram_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "active", "legendLink": null }, { - "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n) > 0\n", + "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", "format": "time_series", "legendFormat": "active ({{ name }})", "legendLink": null @@ -282,13 +282,13 @@ "steppedLine": false, "targets": [ { - "expr": "sum(\n cortex_ingester_active_native_histogram_buckets{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n cortex_ingester_active_native_histogram_buckets{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "buckets", "legendLink": null }, { - "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_buckets_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n) > 0\n", + "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_buckets_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", "format": "time_series", "legendFormat": "buckets ({{ name }})", "legendLink": null @@ -369,7 +369,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "series", "legendLink": null @@ -438,7 +438,7 @@ "steppedLine": false, "targets": [ { - "expr": "time() - max(cortex_distributor_latest_seen_sample_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"} > 0)", + "expr": "time() - max(cortex_distributor_latest_seen_sample_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} > 0)", "format": "time_series", "legendFormat": "age", "legendLink": null @@ -588,7 +588,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_distributor_requests_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_distributor_requests_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "rate", "legendLink": null @@ -669,7 +669,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_distributor_received_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_distributor_received_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "rate", "legendLink": null @@ -750,7 +750,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (reason) (rate(cortex_discarded_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum by (reason) (rate(cortex_discarded_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{ reason }}", "legendLink": null @@ -831,7 +831,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_distributor_samples_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_distributor_samples_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "rate", "legendLink": null @@ -912,7 +912,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "rate", "legendLink": null @@ -993,13 +993,13 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_distributor_deduped_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_distributor_deduped_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "deduplicated", "legendLink": null }, { - "expr": "sum(rate(cortex_distributor_non_ha_samples_received_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_distributor_non_ha_samples_received_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "non-HA", "legendLink": null @@ -1074,7 +1074,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (reason) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum by (reason) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{ reason }} (distributor)", "legendLink": null @@ -1161,7 +1161,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_distributor_exemplars_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_distributor_exemplars_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "rate", "legendLink": null @@ -1230,7 +1230,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "rate", "legendLink": null @@ -1305,7 +1305,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (reason) (rate(cortex_discarded_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum by (reason) (rate(cortex_discarded_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{ reason }}", "legendLink": null @@ -1374,7 +1374,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(\n rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval])\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval])\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "rate", "legendLink": null @@ -2317,6 +2317,261 @@ "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (user) (cortex_alertmanager_alerts{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "alerts", + "legendLink": null + }, + { + "expr": "sum by (user) (cortex_alertmanager_silences{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "silences", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Alerts", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "failed": "#E24D42", + "successful": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 30, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(\nsum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))\n-\non() (sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) or on () vector(0))\n) > 0\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "NPS", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 31, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(\nsum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration)\n-\n(sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration) or\n (sum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration) * 0)\n)) > 0\n", + "format": "time_series", + "legendFormat": "success - {{ integration }}", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration)", + "format": "time_series", + "legendFormat": "failed - {{ integration }}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "NPS by integration", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alertmanager", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 32, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, "span": 6, "stack": false, "steppedLine": false, @@ -2371,7 +2626,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 30, + "id": 33, "legend": { "avg": false, "current": false, @@ -2457,7 +2712,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 31, + "id": 34, "legend": { "avg": false, "current": false, @@ -2531,7 +2786,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 32, + "id": 35, "legend": { "avg": false, "current": false, @@ -2620,7 +2875,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-top-tenants.json b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-top-tenants.json index ee179d50..1a1cc2eb 100644 --- a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-top-tenants.json +++ b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-top-tenants.json @@ -127,7 +127,7 @@ ], "targets": [ { - "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n )\n)\n", + "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n )\n)\n", "format": "table", "instant": true, "legendFormat": "", @@ -253,7 +253,7 @@ ], "targets": [ { - "expr": "topk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"} )\n)\n)", + "expr": "topk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} )\n)\n)", "format": "table", "instant": true, "legendFormat": "", @@ -341,7 +341,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"} )\n)\n\nand\ntopk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"} @ end())\n)\n - sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"} @ start())\n)\n)\n", + "expr": "sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} )\n)\n\nand\ntopk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} @ end())\n)\n - sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} @ start())\n)\n)\n", "format": "time_series", "legendFormat": "{{ user }}", "legendLink": null @@ -465,7 +465,7 @@ ], "targets": [ { - "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}[5m])))", + "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", "format": "table", "instant": true, "legendFormat": "", @@ -553,7 +553,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", + "expr": "sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", "format": "time_series", "legendFormat": "{{ user }}", "legendLink": null @@ -677,7 +677,7 @@ ], "targets": [ { - "expr": "topk($limit, sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor|cortex|mimir|mimir-write.*))\"}[5m])))", + "expr": "topk($limit, sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", "format": "table", "instant": true, "legendFormat": "", @@ -765,7 +765,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", + "expr": "sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", "format": "time_series", "legendFormat": "{{ user }}", "legendLink": null @@ -889,7 +889,7 @@ ], "targets": [ { - "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n )\n)\n", + "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n )\n)\n", "format": "table", "instant": true, "legendFormat": "", @@ -1015,7 +1015,7 @@ ], "targets": [ { - "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}[5m])))", + "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", "format": "table", "instant": true, "legendFormat": "", @@ -1333,7 +1333,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-writes-networking.json b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-writes-networking.json index 4c256035..3e00f6a0 100644 --- a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-writes-networking.json +++ b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-writes-networking.json @@ -1022,7 +1022,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-writes-resources.json b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-writes-resources.json index 335e2f0d..71c6ab33 100644 --- a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-writes-resources.json +++ b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-writes-resources.json @@ -1257,7 +1257,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-writes.json b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-writes.json index 080d04cf..89aa76f4 100644 --- a/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-writes.json +++ b/monitoring-mixins/mimir-mixin/deploy/dashboards_out/mimir-writes.json @@ -90,7 +90,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})", + "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", "format": "time_series", "instant": true, "refId": "A" @@ -166,7 +166,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})", + "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", "format": "time_series", "instant": true, "refId": "A" @@ -242,7 +242,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(cortex_ingester_memory_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}))\n", + "expr": "sum(cortex_ingester_memory_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}))\n", "format": "time_series", "instant": true, "refId": "A" @@ -318,7 +318,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(cortex_ingester_tsdb_exemplar_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}))\n", + "expr": "sum(cortex_ingester_tsdb_exemplar_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}))\n", "format": "time_series", "instant": true, "refId": "A" @@ -490,7 +490,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -564,21 +564,21 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", "format": "time_series", "legendFormat": "99th percentile", "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", "format": "time_series", "legendFormat": "50th percentile", "refId": "B", "step": 10 }, { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", "format": "time_series", "legendFormat": "Average", "refId": "C", @@ -661,7 +661,7 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval])))", + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval])))", "format": "time_series", "legendFormat": "", "legendLink": null @@ -959,7 +959,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -1033,19 +1033,19 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -1141,7 +1141,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -1215,19 +1215,19 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -2157,7 +2157,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(cluster_namespace_job:cortex_distributor_exemplars_in:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})", + "expr": "sum(cluster_namespace_job:cortex_distributor_exemplars_in:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", "format": "time_series", "legendFormat": "incoming exemplars", "legendLink": null @@ -2232,7 +2232,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})", + "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", "format": "time_series", "legendFormat": "received exemplars", "legendLink": null @@ -2307,7 +2307,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(\n cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "ingested exemplars", "legendLink": null @@ -2382,7 +2382,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(\n cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "appended exemplars", "legendLink": null @@ -2468,7 +2468,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (reason) (rate(cortex_distributor_instance_rejected_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "expr": "sum by (reason) (rate(cortex_distributor_instance_rejected_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{reason}}", "legendLink": null @@ -2606,7 +2606,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/mimir-mixin/deploy/manifests/k8s-all-in-one.yaml b/monitoring-mixins/mimir-mixin/deploy/manifests/k8s-all-in-one.yaml index 8fe26a29..457519e9 100644 --- a/monitoring-mixins/mimir-mixin/deploy/manifests/k8s-all-in-one.yaml +++ b/monitoring-mixins/mimir-mixin/deploy/manifests/k8s-all-in-one.yaml @@ -735,7 +735,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -3393,7 +3393,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -4330,7 +4330,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -6576,7 +6576,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -6898,7 +6898,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -7826,7 +7826,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -8087,7 +8087,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -9232,7 +9232,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -10544,7 +10544,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -10729,7 +10729,7 @@ data: "uid": "$datasource" }, "exemplar": false, - "expr": "(\n # gRPC errors are not tracked as 5xx but \"error\".\n sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.*|error\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", + "expr": "(\n # gRPC errors are not tracked as 5xx but \"error\".\n sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.*|error\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", "instant": false, "legendFormat": "Writes", "range": true @@ -10863,7 +10863,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -10937,21 +10937,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", "format": "time_series", "legendFormat": "99th percentile", "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", "format": "time_series", "legendFormat": "50th percentile", "refId": "B", "step": 10 }, { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", "format": "time_series", "legendFormat": "Average", "refId": "C", @@ -11026,13 +11026,13 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})", + "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", "format": "time_series", "legendFormat": "samples / sec", "legendLink": null }, { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})", + "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", "format": "time_series", "legendFormat": "exemplars / sec", "legendLink": null @@ -12003,7 +12003,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -15152,7 +15152,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -17258,7 +17258,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -19872,7 +19872,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -24227,7 +24227,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -25223,7 +25223,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -26132,7 +26132,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -26480,7 +26480,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -26592,7 +26592,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -26700,7 +26700,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -26812,7 +26812,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))\n", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))\n", "format": null, "instant": false, "interval": "", @@ -27491,12 +27491,15 @@ data: "type": "table" }, { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": "$datasource", - "fill": 1, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + }, + "unit": "percentunit" + } + }, "gridPos": { "h": 8, "w": 8, @@ -27504,31 +27507,19 @@ data: "y": 8 }, "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, "targets": [ { - "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))[1h:])\n)\n", + "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))[1h:])\n)\n", "format": "time_series", "legendFormat": "writes", "legendLink": null @@ -27540,41 +27531,8 @@ data: "legendLink": null } ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, "title": "Latency vs 24h ago", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] + "type": "timeseries" } ], "refresh": "10s", @@ -27592,7 +27550,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -30156,7 +30114,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -30529,7 +30487,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -30670,194 +30628,1354 @@ data: "height": "250px", "panels": [ { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "${lokidatasource}", - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Time range" - }, - "properties": [ - { - "id": "mappings", - "value": [ - { - "from": "", - "id": 1, - "text": "Instant query", - "to": "", - "type": 1, - "value": "0" - } - ] - }, - { - "id": "unit", - "value": "s" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Step" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - } - ] - }, + "fill": 1, "id": 1, - "span": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "{cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | response_time > ${min_duration}", - "instant": false, - "legendFormat": "", - "range": true, - "refId": "A" + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null } ], - "title": "Slow queries", - "transformations": [ + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Response time", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ { - "id": "extractFields", - "options": { - "source": "labels" - } + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "id": "calculateField", - "options": { - "alias": "Time range", - "binary": { - "left": "param_end", - "operator": "-", - "reducer": "sum", - "right": "param_start" - }, - "mode": "binary", - "reduce": { - "reducer": "sum" - }, - "replaceFields": false - } + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null }, { - "id": "organize", - "options": { - "excludeByName": { - "Line": true, - "Time": true, - "caller": true, - "cluster": true, - "container": true, - "host": true, - "id": true, - "job": true, - "labels": true, - "level": true, - "line": true, - "method": true, - "msg": true, - "name": true, - "namespace": true, - "param_end": true, - "param_start": true, - "param_time": true, - "path": true, - "pod": true, - "pod_template_hash": true, - "query_wall_time_seconds": true, - "stream": true, - "traceID": true, - "tsNs": true - }, - "indexByName": { - "Time range": 3, - "param_query": 2, - "param_step": 4, - "response_time": 5, - "ts": 0, - "user": 1 - }, - "renameByName": { - "org_id": "Tenant ID", - "param_query": "Query", - "param_step": "Step", - "response_time": "Duration" - } - } + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null } ], - "type": "table" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "mimir" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Fetched series", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, - "hide": 0, - "label": "Data Source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".*", - "current": { - "text": "prod", - "value": "prod" + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Fetched chunks", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(cortex_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Response size", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, - "datasource": "$datasource", - "hide": 0, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Time span", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Accross tenants", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 response time", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 fetched series", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 fetched chunks", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 response size", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 time span", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Top 10 tenants", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 response time", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 fetched series", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 fetched chunks", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 response size", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${lokidatasource}", + "fill": 1, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 time span", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Top 10 User-Agents", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "${lokidatasource}", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time range" + }, + "properties": [ + { + "id": "mappings", + "value": [ + { + "from": "", + "id": 1, + "text": "Instant query", + "to": "", + "type": 1, + "value": "0" + } + ] + }, + { + "id": "unit", + "value": "s" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Step" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] + } + ] + }, + "height": "500px", + "id": 16, + "span": 12, + "targets": [ + { + "expr": "{cluster=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration}", + "instant": false, + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Slow queries", + "transformations": [ + { + "id": "extractFields", + "options": { + "source": "labels" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Line": true, + "Time": true, + "caller": true, + "cluster": true, + "container": true, + "host": true, + "id": true, + "job": true, + "labels": true, + "level": true, + "line": true, + "method": true, + "msg": true, + "name": true, + "namespace": true, + "path": true, + "pod": true, + "pod_template_hash": true, + "query_wall_time_seconds": true, + "stream": true, + "traceID": true, + "tsNs": true + }, + "indexByName": { + "length": 2, + "param_end": 4, + "param_query": 7, + "param_start": 3, + "param_step": 6, + "param_time": 5, + "response_time": 8, + "ts": 0, + "user": 1 + }, + "renameByName": { + "org_id": "Tenant ID", + "param_query": "Query", + "param_step": "Step", + "response_time": "Duration" + } + } + } + ], + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(cortex_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, "includeAll": false, "label": "namespace", "multi": false, @@ -30919,6 +32037,25 @@ data: ], "query": ".*", "type": "textbox" + }, + { + "current": { + "selected": true, + "text": ".*", + "value": ".*" + }, + "hide": 0, + "label": "User-Agent HTTP Header", + "name": "user_agent", + "options": [ + { + "selected": true, + "text": ".*", + "value": ".*" + } + ], + "query": ".*", + "type": "textbox" } ] }, @@ -31066,7 +32203,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(\n (\n cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n - cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n (\n cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n - cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "in-memory", "legendLink": null @@ -31078,13 +32215,13 @@ data: "legendLink": null }, { - "expr": "sum(\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "active", "legendLink": null }, { - "expr": "sum by (name) (\n cortex_ingester_active_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n) > 0\n", + "expr": "sum by (name) (\n cortex_ingester_active_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", "format": "time_series", "legendFormat": "active ({{ name }})", "legendLink": null @@ -31165,13 +32302,13 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(\n cortex_ingester_active_native_histogram_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n cortex_ingester_active_native_histogram_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "active", "legendLink": null }, { - "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n) > 0\n", + "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_series_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", "format": "time_series", "legendFormat": "active ({{ name }})", "legendLink": null @@ -31252,13 +32389,13 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(\n cortex_ingester_active_native_histogram_buckets{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n cortex_ingester_active_native_histogram_buckets{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "buckets", "legendLink": null }, { - "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_buckets_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n) > 0\n", + "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_buckets_custom_tracker{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", "format": "time_series", "legendFormat": "buckets ({{ name }})", "legendLink": null @@ -31339,7 +32476,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "series", "legendLink": null @@ -31408,7 +32545,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "time() - max(cortex_distributor_latest_seen_sample_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"} > 0)", + "expr": "time() - max(cortex_distributor_latest_seen_sample_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} > 0)", "format": "time_series", "legendFormat": "age", "legendLink": null @@ -31558,7 +32695,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_distributor_requests_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_distributor_requests_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "rate", "legendLink": null @@ -31639,7 +32776,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_distributor_received_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_distributor_received_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "rate", "legendLink": null @@ -31720,7 +32857,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (reason) (rate(cortex_discarded_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum by (reason) (rate(cortex_discarded_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{ reason }}", "legendLink": null @@ -31801,7 +32938,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_distributor_samples_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_distributor_samples_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "rate", "legendLink": null @@ -31882,7 +33019,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "rate", "legendLink": null @@ -31963,13 +33100,13 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_distributor_deduped_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_distributor_deduped_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "deduplicated", "legendLink": null }, { - "expr": "sum(rate(cortex_distributor_non_ha_samples_received_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_distributor_non_ha_samples_received_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "non-HA", "legendLink": null @@ -32044,7 +33181,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (reason) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum by (reason) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{ reason }} (distributor)", "legendLink": null @@ -32131,7 +33268,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_distributor_exemplars_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_distributor_exemplars_in_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "rate", "legendLink": null @@ -32200,7 +33337,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "rate", "legendLink": null @@ -32275,7 +33412,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (reason) (rate(cortex_discarded_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "sum by (reason) (rate(cortex_discarded_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{ reason }}", "legendLink": null @@ -32344,7 +33481,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(\n rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval])\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval])\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "rate", "legendLink": null @@ -32783,7 +33920,333 @@ data: "show": false } ] - }, + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (rule_group) (rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) > 0", + "format": "time_series", + "legendFormat": "{{ rule_group }}", + "legendLink": null + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Failed evaluations rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Rules", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 25, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "rules", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk($limit, sum by (rule_group) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit biggest groups", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 26, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "seconds", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk($limit, sum by (rule_group) (cortex_prometheus_rule_group_last_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit slowest groups (last evaluation)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Top rules", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { }, "bars": false, @@ -32791,15 +34254,9 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 24, + "id": 27, "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false + "show": false }, "lines": true, "linewidth": 1, @@ -32811,21 +34268,21 @@ data: "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 3, - "stack": true, + "span": 6, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum by (rule_group) (rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) > 0", + "expr": "sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", "format": "time_series", - "legendFormat": "{{ rule_group }}", + "legendFormat": "rate", "legendLink": null } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Failed evaluations rate", + "title": "Sent notifications rate", "tooltip": { "shared": false, "sort": 0, @@ -32857,35 +34314,19 @@ data: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Rules", - "titleSize": "h6" - }, - { - "collapse": true, - "height": "250px", - "panels": [ + }, { - "aliasColors": { }, + "aliasColors": { + "rate": "#E24D42" + }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 25, + "id": 28, "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false + "show": false }, "lines": true, "linewidth": 1, @@ -32896,68 +34337,28 @@ data: "points": false, "renderer": "flot", "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, "spaceLength": 10, "span": 6, "stack": false, "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "rules", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], "targets": [ { - "expr": "topk($limit, sum by (rule_group) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" + "expr": "sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Top $limit biggest groups", + "title": "Failed notifications rate", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -32983,7 +34384,19 @@ data: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Notifications", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { }, "bars": false, @@ -32991,7 +34404,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 26, + "id": 29, "legend": { "avg": false, "current": false, @@ -33010,68 +34423,34 @@ data: "points": false, "renderer": "flot", "seriesOverrides": [ ], - "sort": { - "col": 2, - "desc": true - }, "spaceLength": 10, - "span": 6, + "span": 4, "stack": false, "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, + "targets": [ { - "alias": "seconds", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "number", - "unit": "short" + "expr": "sum by (user) (cortex_alertmanager_alerts{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "alerts", + "legendLink": null }, { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "topk($limit, sum by (rule_group) (cortex_prometheus_rule_group_last_duration_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" + "expr": "sum by (user) (cortex_alertmanager_silences{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "silences", + "legendLink": null } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Top $limit slowest groups (last evaluation)", + "title": "Alerts", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -33097,29 +34476,26 @@ data: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Top rules", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { - "aliasColors": { }, + "aliasColors": { + "failed": "#E24D42", + "successful": "#7EB26D" + }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 27, + "id": 30, "legend": { - "show": false + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, "lines": true, "linewidth": 1, @@ -33131,21 +34507,27 @@ data: "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "(\nsum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))\n-\non() (sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) or on () vector(0))\n) > 0\n", "format": "time_series", - "legendFormat": "rate", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", "legendLink": null } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Sent notifications rate", + "title": "NPS", "tooltip": { "shared": false, "sort": 0, @@ -33179,17 +34561,21 @@ data: ] }, { - "aliasColors": { - "rate": "#E24D42" - }, + "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 28, + "id": 31, "legend": { - "show": false + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, "lines": true, "linewidth": 1, @@ -33201,21 +34587,27 @@ data: "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", + "expr": "(\nsum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration)\n-\n(sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration) or\n (sum(rate(cortex_alertmanager_notifications_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration) * 0)\n)) > 0\n", "format": "time_series", - "legendFormat": "rate", + "legendFormat": "success - {{ integration }}", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_alertmanager_notifications_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration)", + "format": "time_series", + "legendFormat": "failed - {{ integration }}", "legendLink": null } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Failed notifications rate", + "title": "NPS by integration", "tooltip": { "shared": false, "sort": 0, @@ -33253,7 +34645,7 @@ data: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Notifications", + "title": "Alertmanager", "titleSize": "h6" }, { @@ -33267,7 +34659,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 29, + "id": 32, "legend": { "avg": false, "current": false, @@ -33341,7 +34733,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 30, + "id": 33, "legend": { "avg": false, "current": false, @@ -33427,7 +34819,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 31, + "id": 34, "legend": { "avg": false, "current": false, @@ -33501,7 +34893,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 32, + "id": 35, "legend": { "avg": false, "current": false, @@ -33590,7 +34982,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -33885,7 +35277,7 @@ data: ], "targets": [ { - "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n )\n)\n", + "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n )\n)\n", "format": "table", "instant": true, "legendFormat": "", @@ -34011,7 +35403,7 @@ data: ], "targets": [ { - "expr": "topk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"} )\n)\n)", + "expr": "topk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} )\n)\n)", "format": "table", "instant": true, "legendFormat": "", @@ -34099,7 +35491,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"} )\n)\n\nand\ntopk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"} @ end())\n)\n - sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"} @ start())\n)\n)\n", + "expr": "sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} )\n)\n\nand\ntopk($limit, sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} @ end())\n)\n - sum by (user) (\n (\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_created_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n -\n sum by (user, cluster, namespace) (cortex_ingester_memory_series_removed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n )\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} @ start())\n)\n)\n", "format": "time_series", "legendFormat": "{{ user }}", "legendLink": null @@ -34223,7 +35615,7 @@ data: ], "targets": [ { - "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}[5m])))", + "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", "format": "table", "instant": true, "legendFormat": "", @@ -34311,7 +35703,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", + "expr": "sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", "format": "time_series", "legendFormat": "{{ user }}", "legendLink": null @@ -34435,7 +35827,7 @@ data: ], "targets": [ { - "expr": "topk($limit, sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor|cortex|mimir|mimir-write.*))\"}[5m])))", + "expr": "topk($limit, sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", "format": "table", "instant": true, "legendFormat": "", @@ -34523,7 +35915,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", + "expr": "sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", "format": "time_series", "legendFormat": "{{ user }}", "legendLink": null @@ -34647,7 +36039,7 @@ data: ], "targets": [ { - "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n )\n)\n", + "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n )\n)\n", "format": "table", "instant": true, "legendFormat": "", @@ -34773,7 +36165,7 @@ data: ], "targets": [ { - "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}[5m])))", + "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_exemplars_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", "format": "table", "instant": true, "legendFormat": "", @@ -35091,7 +36483,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -36248,7 +37640,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -37609,7 +39001,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -37803,7 +39195,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})", + "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", "format": "time_series", "instant": true, "refId": "A" @@ -37879,7 +39271,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})", + "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", "format": "time_series", "instant": true, "refId": "A" @@ -37955,7 +39347,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(cortex_ingester_memory_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}))\n", + "expr": "sum(cortex_ingester_memory_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}))\n", "format": "time_series", "instant": true, "refId": "A" @@ -38031,7 +39423,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(cortex_ingester_tsdb_exemplar_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}))\n", + "expr": "sum(cortex_ingester_tsdb_exemplar_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}))\n", "format": "time_series", "instant": true, "refId": "A" @@ -38203,7 +39595,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -38277,21 +39669,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", "format": "time_series", "legendFormat": "99th percentile", "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", "format": "time_series", "legendFormat": "50th percentile", "refId": "B", "step": 10 }, { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", + "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", "format": "time_series", "legendFormat": "Average", "refId": "C", @@ -38374,7 +39766,7 @@ data: "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval])))", + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval])))", "format": "time_series", "legendFormat": "", "legendLink": null @@ -38672,7 +40064,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -38746,19 +40138,19 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -38854,7 +40246,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -38928,19 +40320,19 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -39870,7 +41262,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(cluster_namespace_job:cortex_distributor_exemplars_in:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})", + "expr": "sum(cluster_namespace_job:cortex_distributor_exemplars_in:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", "format": "time_series", "legendFormat": "incoming exemplars", "legendLink": null @@ -39945,7 +41337,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})", + "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", "format": "time_series", "legendFormat": "received exemplars", "legendLink": null @@ -40020,7 +41412,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(\n cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "ingested exemplars", "legendLink": null @@ -40095,7 +41487,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(\n cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "appended exemplars", "legendLink": null @@ -40181,7 +41573,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (reason) (rate(cortex_distributor_instance_rejected_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "expr": "sum by (reason) (rate(cortex_distributor_instance_rejected_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{reason}}", "legendLink": null @@ -40319,7 +41711,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/mimir-mixin/jsonnetfile.lock.json b/monitoring-mixins/mimir-mixin/jsonnetfile.lock.json index 05e6b0c6..a882010c 100644 --- a/monitoring-mixins/mimir-mixin/jsonnetfile.lock.json +++ b/monitoring-mixins/mimir-mixin/jsonnetfile.lock.json @@ -8,8 +8,8 @@ "subdir": "grafana-builder" } }, - "version": "eb731883044fc58f255d79c2a8d78a5854084e05", - "sum": "VmOxvg9FuY9UYr3lN6ZJe2HhuIErJoWimPybQr3S3yQ=" + "version": "02db06f540086fa3f67d487bd01e1b314853fb8f", + "sum": "B49EzIY2WZsFxNMJcgRxE/gcZ9ltnS8pkOOV6Q5qioc=" }, { "source": { @@ -18,7 +18,7 @@ "subdir": "mixin-utils" } }, - "version": "eb731883044fc58f255d79c2a8d78a5854084e05", + "version": "02db06f540086fa3f67d487bd01e1b314853fb8f", "sum": "PGf+vyCHqGxxS6SKNZiN3vR1xPnw6VOESXbeJrA5FaA=" }, { @@ -28,8 +28,8 @@ "subdir": "operations/mimir-mixin" } }, - "version": "874ea9c79f8ba7bf0250a55b94a9678b1b966af2", - "sum": "OdrJU9cIfAmYDdQIsDLXrqhNND8/s34w3OYX9Jc61ns=" + "version": "297e905ce8bb86382b5c50f9abbcf335e2b22244", + "sum": "TBKoXovJMzJ5M6vCrj1zser5ljWYuYXNy/IoUOZ3S/c=" } ], "legacyImports": false diff --git a/monitoring-mixins/mimir-mixin/mixin.libsonnet b/monitoring-mixins/mimir-mixin/mixin.libsonnet index d98a3814..aa9f0f30 100644 --- a/monitoring-mixins/mimir-mixin/mixin.libsonnet +++ b/monitoring-mixins/mimir-mixin/mixin.libsonnet @@ -1,661 +1 @@ -local mixin = import 'mimir-mixin/mixin.libsonnet'; - -mixin { - grafanaDashboardFolder: 'Mimir', - grafanaDashboardShards: 4, - - _config+:: { - // The product name used when building dashboards. - product: 'Mimir', - - // The prefix including product name used when building dashboards. - dashboard_prefix: '%(product)s / ' % $._config.product, - // Controls tooltip and hover highlight behavior across different panels - // 0: Default, the cross hair will appear on only one panel - // 1: Shared crosshair, the crosshair will appear on all panels but the - // tooltip will appear only on the panel under the cursor - // 2: Shared Tooltip, both crosshair and tooltip will appear on all panels - graph_tooltip: 1, - - // Tags for dashboards. - tags: ['mimir'], - - // If Mimir is deployed as a single binary, set to true to - // modify the job selectors in the dashboard queries. - singleBinary: false, - - // This is mapping between a Mimir component name and the regular expression that should be used - // to match its instance and container name. Mimir jsonnet and Helm guarantee that the instance name - // (e.g. Kubernetes Deployment) and container name always match, so it's safe to use a shared mapping. - // - // This mapping is intentionally local and can't be overridden. If the final user needs to customize - // dashboards and alerts, they should override the final matcher regexp (e.g. container_names or instance_names). - local componentNameRegexp = { - // Microservices deployment mode. The following matchers MUST match only - // the instance when deployed in microservices mode (e.g. "distributor" - // matcher shouldn't match "mimir-write" too). - compactor: 'compactor', - alertmanager: 'alertmanager', - alertmanager_im: 'alertmanager-im', - ingester: 'ingester', - distributor: 'distributor', - querier: 'querier', - query_frontend: 'query-frontend', - query_scheduler: 'query-scheduler', - ruler: 'ruler', - ruler_querier: 'ruler-querier', - ruler_query_frontend: 'ruler-query-frontend', - ruler_query_scheduler: 'ruler-query-scheduler', - store_gateway: 'store-gateway', - overrides_exporter: 'overrides-exporter', - gateway: '(gateway|cortex-gw|cortex-gw-internal)', - - // Read-write deployment mode. The following matchers MUST match only - // the instance when deployed in read-write deployment mode (e.g. "mimir-write" - // matcher shouldn't match "distributor" too). - mimir_write: 'mimir-write', - mimir_read: 'mimir-read', - mimir_backend: 'mimir-backend', - }, - - // Some dashboards show panels grouping together multiple components of a given "path". - // This mapping configures which components belong to each group. - local componentGroups = { - write: ['distributor', 'ingester', 'mimir_write'], - read: ['query_frontend', 'querier', 'ruler_query_frontend', 'ruler_querier', 'mimir_read'], - backend: ['query_scheduler', 'ruler_query_scheduler', 'ruler', 'store_gateway', 'compactor', 'alertmanager', 'overrides_exporter', 'mimir_backend'], - }, - - // These are used by the dashboards and allow for the simultaneous display of - // microservice and single binary Mimir clusters. - // Whenever you do any change here, please reflect it in the doc at: - // docs/sources/mimir/manage/monitoring-grafana-mimir/requirements.md - job_names: { - ingester: ['ingester.*', 'cortex', 'mimir', 'mimir-write.*'], // Match also custom and per-zone ingester deployments. - distributor: ['distributor', 'cortex', 'mimir', 'mimir-write.*'], - querier: ['querier.*', 'cortex', 'mimir', 'mimir-read.*'], // Match also custom querier deployments. - ruler_querier: ['ruler-querier.*'], // Match also custom querier deployments. - ruler: ['ruler', 'cortex', 'mimir', 'mimir-backend.*'], - query_frontend: ['query-frontend.*', 'cortex', 'mimir', 'mimir-read.*'], // Match also custom query-frontend deployments. - ruler_query_frontend: ['ruler-query-frontend.*'], // Match also custom ruler-query-frontend deployments. - query_scheduler: ['query-scheduler.*', 'mimir-backend.*'], // Not part of single-binary. Match also custom query-scheduler deployments. - ruler_query_scheduler: ['ruler-query-scheduler.*'], // Not part of single-binary. Match also custom query-scheduler deployments. - ring_members: ['admin-api', 'alertmanager', 'compactor.*', 'distributor', 'ingester.*', 'querier.*', 'ruler', 'ruler-querier.*', 'store-gateway.*', 'cortex', 'mimir', 'mimir-write.*', 'mimir-read.*', 'mimir-backend.*'], - store_gateway: ['store-gateway.*', 'cortex', 'mimir', 'mimir-backend.*'], // Match also per-zone store-gateway deployments. - gateway: ['gateway', 'cortex-gw', 'cortex-gw-internal'], - compactor: ['compactor.*', 'cortex', 'mimir', 'mimir-backend.*'], // Match also custom compactor deployments. - alertmanager: ['alertmanager', 'cortex', 'mimir', 'mimir-backend.*'], - overrides_exporter: ['overrides-exporter', 'mimir-backend.*'], - - // The following are job matchers used to select all components in a given "path". - write: ['distributor', 'ingester.*', 'mimir-write.*'], - read: ['query-frontend.*', 'querier.*', 'ruler-query-frontend.*', 'ruler-querier.*', 'mimir-read.*'], - backend: ['ruler', 'query-scheduler.*', 'ruler-query-scheduler.*', 'store-gateway.*', 'compactor.*', 'alertmanager', 'overrides-exporter', 'mimir-backend.*'], - }, - - // Name selectors for different application instances, using the "per_instance_label". - instance_names: { - // Wrap the regexp into an Helm compatible matcher if the deployment type is "kubernetes". - local helmCompatibleMatcher = function(regexp) if $._config.deployment_type == 'kubernetes' then '(.*mimir-)?%s' % regexp else regexp, - // Wrap the regexp to match any prefix if the deployment type is "baremetal". - local baremetalCompatibleMatcher = function(regexp) if $._config.deployment_type == 'baremetal' then '.*%s' % regexp else regexp, - local instanceMatcher = function(regexp) baremetalCompatibleMatcher(helmCompatibleMatcher('%s.*' % regexp)), - - // Microservices deployment mode. The following matchers MUST match only - // the instance when deployed in microservices mode (e.g. "distributor" - // matcher shouldn't match "mimir-write" too). - compactor: instanceMatcher(componentNameRegexp.compactor), - alertmanager: instanceMatcher(componentNameRegexp.alertmanager), - alertmanager_im: instanceMatcher(componentNameRegexp.alertmanager_im), - ingester: instanceMatcher(componentNameRegexp.ingester), - distributor: instanceMatcher(componentNameRegexp.distributor), - querier: instanceMatcher(componentNameRegexp.querier), - ruler: instanceMatcher(componentNameRegexp.ruler), - ruler_query_frontend: instanceMatcher(componentNameRegexp.ruler_query_frontend), - ruler_query_scheduler: instanceMatcher(componentNameRegexp.ruler_query_scheduler), - ruler_querier: instanceMatcher(componentNameRegexp.ruler_querier), - query_frontend: instanceMatcher(componentNameRegexp.query_frontend), - query_scheduler: instanceMatcher(componentNameRegexp.query_scheduler), - store_gateway: instanceMatcher(componentNameRegexp.store_gateway), - overrides_exporter: instanceMatcher(componentNameRegexp.overrides_exporter), - gateway: instanceMatcher(componentNameRegexp.gateway), - - // Read-write deployment mode. The following matchers MUST match only - // the instance when deployed in read-write deployment mode (e.g. "mimir-write" - // matcher shouldn't match "distributor" too). - mimir_write: instanceMatcher(componentNameRegexp.mimir_write), - mimir_read: instanceMatcher(componentNameRegexp.mimir_read), - mimir_backend: instanceMatcher(componentNameRegexp.mimir_backend), - - // The following are instance matchers used to select all components in a given "path". - // These matchers CAN match both instances deployed in "microservices" and "read-write" mode. - local componentsGroupMatcher = function(components) - instanceMatcher('(%s)' % std.join('|', std.map(function(name) componentNameRegexp[name], components))), - - write: componentsGroupMatcher(componentGroups.write), - read: componentsGroupMatcher(componentGroups.read), - backend: componentsGroupMatcher(componentGroups.backend), - }, - all_instances: std.join('|', std.map(function(name) componentNameRegexp[name], componentGroups.write + componentGroups.read + componentGroups.backend)), - - container_names: { - // Microservices deployment mode. The following matchers MUST match only - // the instance when deployed in microservices mode (e.g. "distributor" - // matcher shouldn't match "mimir-write" too). - gateway: componentNameRegexp.gateway, - distributor: componentNameRegexp.distributor, - ingester: componentNameRegexp.ingester, - query_frontend: componentNameRegexp.query_frontend, - query_scheduler: componentNameRegexp.query_scheduler, - querier: componentNameRegexp.querier, - store_gateway: componentNameRegexp.store_gateway, - ruler: componentNameRegexp.ruler, - ruler_query_frontend: componentNameRegexp.ruler_query_frontend, - ruler_query_scheduler: componentNameRegexp.ruler_query_scheduler, - ruler_querier: componentNameRegexp.ruler_querier, - alertmanager: componentNameRegexp.alertmanager, - alertmanager_im: componentNameRegexp.alertmanager_im, - compactor: componentNameRegexp.compactor, - - // Read-write deployment mode. The following matchers MUST match only - // the container when deployed in read-write deployment mode (e.g. "mimir-write" - // matcher shouldn't match "distributor" too). - mimir_write: componentNameRegexp.mimir_write, - mimir_read: componentNameRegexp.mimir_read, - mimir_backend: componentNameRegexp.mimir_backend, - - // The following are container matchers used to select all components in a given "path". - // These matchers CAN match both instances deployed in "microservices" and "read-write" mode. - local componentsGroupMatcher = function(components) std.join('|', std.map(function(name) componentNameRegexp[name], components)), - - write: componentsGroupMatcher(componentGroups.write), - read: componentsGroupMatcher(componentGroups.read), - backend: componentsGroupMatcher(componentGroups.backend), - }, - - // The label used to differentiate between different Kubernetes clusters. - per_cluster_label: 'cluster', - per_namespace_label: 'namespace', - per_job_label: 'job', - - // Grouping labels, to uniquely identify and group by {jobs, clusters} - job_labels: [$._config.per_cluster_label, $._config.per_namespace_label, $._config.per_job_label], - job_prefix: '($namespace)/', - cluster_labels: [$._config.per_cluster_label, $._config.per_namespace_label], - - // PromQL queries used to find clusters and namespaces with Mimir. - dashboard_variables: { - job_query: 'cortex_build_info', // Only used if singleBinary is true. - cluster_query: 'cortex_build_info', - namespace_query: 'cortex_build_info{%s=~"$cluster"}' % $._config.per_cluster_label, - }, - - cortex_p99_latency_threshold_seconds: 2.5, - - // Whether resources dashboards are enabled (based on cAdvisor metrics). - resources_dashboards_enabled: true, - - // Whether mimir gateway is enabled - gateway_enabled: false, - - // Whether grafana cloud alertmanager instance-mapper is enabled - alertmanager_im_enabled: false, - - // The label used to differentiate between different application instances (i.e. 'pod' in a kubernetes install). - per_instance_label: 'pod', - - deployment_type: 'kubernetes', - // System mount point where mimir stores its data, used for baremetal - // deployment only. - instance_data_mountpoint: '/', - // Resource consumption threshold to accomodate node loss - // used for baremetal deployment only - resource_threshold: 0.66, - alertmanager_alerts: { - kubernetes: { - memory_allocation: ||| - (container_memory_working_set_bytes{container="alertmanager"} / container_spec_memory_limit_bytes{container="alertmanager"}) > %(threshold)s - and - (container_spec_memory_limit_bytes{container="alertmanager"} > 0) - |||, - }, - baremetal: { - memory_allocation: ||| - (process_resident_memory_bytes{job=~".*/alertmanager"} / on(%(per_instance_label)s) node_memory_MemTotal_bytes{}) > %(threshold)s - |||, - }, - }, - ingester_alerts: { - kubernetes: { - memory_allocation: ||| - ( - # We use RSS instead of working set memory because of the ingester's extensive usage of mmap. - # See: https://github.com/grafana/mimir/issues/2466 - container_memory_rss{container=~"(%(ingester)s|%(mimir_write)s|%(mimir_backend)s)"} - / - ( container_spec_memory_limit_bytes{container=~"(%(ingester)s|%(mimir_write)s|%(mimir_backend)s)"} > 0 ) - ) - # Match only Mimir namespaces. - * on(%(alert_aggregation_labels)s) group_left max by(%(alert_aggregation_labels)s) (cortex_build_info) - > %(threshold)s - |||, - }, - baremetal: { - memory_allocation: ||| - ( - process_resident_memory_bytes{job=~".*/(%(ingester)s|%(mimir_write)s|%(mimir_backend)s)"} - / - on(%(per_instance_label)s) node_memory_MemTotal_bytes{} - ) > %(threshold)s - |||, - }, - }, - mimir_scaling_rules: { - kubernetes: { - actual_replicas_count: - ||| - # Convenience rule to get the number of replicas for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - sum by (%(alert_aggregation_labels)s, deployment) ( - label_replace( - kube_deployment_spec_replicas, - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - or - sum by (%(alert_aggregation_labels)s, deployment) ( - label_replace(kube_statefulset_replicas, "deployment", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?") - ) - |||, - cpu_usage_seconds_total: - ||| - sum by (%(alert_aggregation_labels)s, deployment) ( - label_replace( - label_replace( - sum by (%(alert_aggregation_labels)s, %(per_instance_label)s)(rate(container_cpu_usage_seconds_total[1m])), - "deployment", "$1", "%(per_instance_label)s", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - |||, - resource_requests_cpu_cores: - ||| - # Convenience rule to get the CPU request for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2 - # that remove resource metrics, ref: - # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16 - # - https://github.com/kubernetes/kube-state-metrics/pull/1004 - # - # This is the old expression, compatible with kube-state-metrics < v2.0.0, - # where kube_pod_container_resource_requests_cpu_cores was removed: - ( - sum by (%(alert_aggregation_labels)s, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests_cpu_cores, - "deployment", "$1", "%(per_instance_label)s", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - or - # This expression is compatible with kube-state-metrics >= v1.4.0, - # where kube_pod_container_resource_requests was introduced. - ( - sum by (%(alert_aggregation_labels)s, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests{resource="cpu"}, - "deployment", "$1", "%(per_instance_label)s", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - |||, - cpu_required_replicas_count: - ||| - # Jobs should be sized to their CPU usage. - # We do this by comparing 99th percentile usage over the last 24hrs to - # their current provisioned #replicas and resource requests. - ceil( - %(alert_aggregation_rule_prefix)s_deployment:actual_replicas:count - * - quantile_over_time(0.99, %(alert_aggregation_rule_prefix)s_deployment:container_cpu_usage_seconds_total:sum_rate[24h]) - / - %(alert_aggregation_rule_prefix)s_deployment:kube_pod_container_resource_requests_cpu_cores:sum - ) - |||, - memory_usage: - ||| - # Convenience rule to get the Memory utilization for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - sum by (%(alert_aggregation_labels)s, deployment) ( - label_replace( - label_replace( - container_memory_usage_bytes{image!=""}, - "deployment", "$1", "%(per_instance_label)s", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - |||, - memory_requests: - ||| - # Convenience rule to get the Memory request for both a deployment and a statefulset. - # Multi-zone deployments are grouped together removing the "zone-X" suffix. - # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2 - # that remove resource metrics, ref: - # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16 - # - https://github.com/kubernetes/kube-state-metrics/pull/1004 - # - # This is the old expression, compatible with kube-state-metrics < v2.0.0, - # where kube_pod_container_resource_requests_memory_bytes was removed: - ( - sum by (%(alert_aggregation_labels)s, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests_memory_bytes, - "deployment", "$1", "%(per_instance_label)s", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - or - # This expression is compatible with kube-state-metrics >= v1.4.0, - # where kube_pod_container_resource_requests was introduced. - ( - sum by (%(alert_aggregation_labels)s, deployment) ( - label_replace( - label_replace( - kube_pod_container_resource_requests{resource="memory"}, - "deployment", "$1", "%(per_instance_label)s", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" - ), - # The question mark in "(.*?)" is used to make it non-greedy, otherwise it - # always matches everything and the (optional) zone is not removed. - "deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" - ) - ) - ) - |||, - memory_required_replicas_count: - ||| - # Jobs should be sized to their Memory usage. - # We do this by comparing 99th percentile usage over the last 24hrs to - # their current provisioned #replicas and resource requests. - ceil( - %(alert_aggregation_rule_prefix)s_deployment:actual_replicas:count - * - quantile_over_time(0.99, %(alert_aggregation_rule_prefix)s_deployment:container_memory_usage_bytes:sum[24h]) - / - %(alert_aggregation_rule_prefix)s_deployment:kube_pod_container_resource_requests_memory_bytes:sum - ) - |||, - }, - baremetal: { - actual_replicas_count: - ||| - sum by (%(alert_aggregation_labels)s, deployment) ( - label_replace( - cortex_build_info{namespace="baremetal"}, - "deployment", "$1", "job", "baremetal/(.*)" - ) - ) - |||, - cpu_usage_seconds_total: - ||| - sum by (%(alert_aggregation_labels)s, deployment) ( - irate( - label_replace( - process_cpu_seconds_total{namespace="baremetal"}, - "deployment", "$1", "job", "baremetal/(.*)" - )[5m:] - ) - ) - |||, - resource_requests_cpu_cores: - ||| - sum by (%(alert_aggregation_labels)s, deployment) ( - count without(cpu, mode) ( - label_replace( - node_cpu_seconds_total{mode="idle"}, - "deployment", "$1", "instance", ".*(%(all_instances)s).*" - ) - ) - ) - |||, - cpu_required_replicas_count: - ||| - ceil( - %(alert_aggregation_rule_prefix)s_deployment:actual_replicas:count - * - quantile_over_time(0.99, %(alert_aggregation_rule_prefix)s_deployment:container_cpu_usage_seconds_total:sum_rate[24h]) - / - %(alert_aggregation_rule_prefix)s_deployment:kube_pod_container_resource_requests_cpu_cores:sum - / - %(resource_threshold)s - ) - |||, - memory_usage: - ||| - sum by (%(alert_aggregation_labels)s, deployment) ( - label_replace( - process_resident_memory_bytes{namespace="baremetal"}, - "deployment", "$1", "job", "baremetal/(.*)" - ) - ) - |||, - memory_requests: - ||| - sum by (%(alert_aggregation_labels)s, deployment) ( - label_replace( - node_memory_MemTotal_bytes, - "deployment", "$1", "instance", ".*(%(all_instances)s).*" - ) - ) - |||, - memory_required_replicas_count: - ||| - ceil( - %(alert_aggregation_rule_prefix)s_deployment:actual_replicas:count - * - quantile_over_time(0.99, %(alert_aggregation_rule_prefix)s_deployment:container_memory_usage_bytes:sum[24h]) - / - %(alert_aggregation_rule_prefix)s_deployment:kube_pod_container_resource_requests_memory_bytes:sum - / - %(resource_threshold)s - ) - |||, - }, - }, - resources_panel_queries: { - kubernetes: { - cpu_usage: 'sum by(%(instanceLabel)s) (rate(container_cpu_usage_seconds_total{%(namespace)s,container=~"%(containerName)s"}[$__rate_interval]))', - cpu_limit: 'min(container_spec_cpu_quota{%(namespace)s,container=~"%(containerName)s"} / container_spec_cpu_period{%(namespace)s,container=~"%(containerName)s"})', - cpu_request: 'min(kube_pod_container_resource_requests{%(namespace)s,container=~"%(containerName)s",resource="cpu"})', - // We use "max" instead of "sum" otherwise during a rolling update of a statefulset we will end up - // summing the memory of the old instance/pod (whose metric will be stale for 5m) to the new instance/pod. - memory_working_usage: 'max by(%(instanceLabel)s) (container_memory_working_set_bytes{%(namespace)s,container=~"%(containerName)s"})', - memory_working_limit: 'min(container_spec_memory_limit_bytes{%(namespace)s,container=~"%(containerName)s"} > 0)', - memory_working_request: 'min(kube_pod_container_resource_requests{%(namespace)s,container=~"%(containerName)s",resource="memory"})', - // We use "max" instead of "sum" otherwise during a rolling update of a statefulset we will end up - // summing the memory of the old instance/pod (whose metric will be stale for 5m) to the new instance/pod. - memory_rss_usage: 'max by(%(instanceLabel)s) (container_memory_rss{%(namespace)s,container=~"%(containerName)s"})', - memory_rss_limit: 'min(container_spec_memory_limit_bytes{%(namespace)s,container=~"%(containerName)s"} > 0)', - memory_rss_request: 'min(kube_pod_container_resource_requests{%(namespace)s,container=~"%(containerName)s",resource="memory"})', - memory_go_heap_usage: 'sum by(%(instanceLabel)s) (go_memstats_heap_inuse_bytes{%(namespace)s,container=~"%(containerName)s"})', - network_receive_bytes: 'sum by(%(instanceLabel)s) (rate(container_network_receive_bytes_total{%(namespaceMatcher)s,%(instanceLabel)s=~"%(instanceName)s"}[$__rate_interval]))', - network_transmit_bytes: 'sum by(%(instanceLabel)s) (rate(container_network_transmit_bytes_total{%(namespaceMatcher)s,%(instanceLabel)s=~"%(instanceName)s"}[$__rate_interval]))', - disk_writes: - ||| - sum by(%(nodeLabel)s, %(instanceLabel)s, device) ( - rate( - node_disk_written_bytes_total[$__rate_interval] - ) - ) - + - %(filterNodeDiskContainer)s - |||, - disk_reads: - ||| - sum by(%(nodeLabel)s, %(instanceLabel)s, device) ( - rate( - node_disk_read_bytes_total[$__rate_interval] - ) - ) + %(filterNodeDiskContainer)s - |||, - disk_utilization: - ||| - max by(persistentvolumeclaim) ( - kubelet_volume_stats_used_bytes{%(namespaceMatcher)s} / - kubelet_volume_stats_capacity_bytes{%(namespaceMatcher)s} - ) - and - count by(persistentvolumeclaim) ( - kube_persistentvolumeclaim_labels{ - %(namespaceMatcher)s, - %(containerMatcher)s - } - ) - |||, - }, - baremetal: { - // Somes queries does not makes sense when running mimir on baremetal - // no need to define them - cpu_usage: 'sum by(%(instanceLabel)s) (rate(node_cpu_seconds_total{mode="user",%(namespace)s,%(instanceLabel)s=~"%(instanceName)s"}[$__rate_interval]))', - memory_working_usage: - ||| - node_memory_MemTotal_bytes{%(namespace)s,%(instanceLabel)s=~"%(instanceName)s"} - - node_memory_MemFree_bytes{%(namespace)s,%(instanceLabel)s=~"%(instanceName)s"} - - node_memory_Buffers_bytes{%(namespace)s,%(instanceLabel)s=~"%(instanceName)s"} - - node_memory_Cached_bytes{%(namespace)s,%(instanceLabel)s=~"%(instanceName)s"} - - node_memory_Slab_bytes{%(namespace)s,%(instanceLabel)s=~"%(instanceName)s"} - - node_memory_PageTables_bytes{%(namespace)s,%(instanceLabel)s=~"%(instanceName)s"} - - node_memory_SwapCached_bytes{%(namespace)s,%(instanceLabel)s=~"%(instanceName)s"} - |||, - // From cAdvisor code, the memory RSS is: - // The amount of anonymous and swap cache memory (includes transparent hugepages). - memory_rss_usage: - ||| - node_memory_Active_anon_bytes{%(namespace)s,%(instanceLabel)s=~"%(instanceName)s"} - + node_memory_SwapCached_bytes{%(namespace)s,%(instanceLabel)s=~"%(instanceName)s"} - |||, - memory_go_heap_usage: 'sum by(%(instanceLabel)s) (go_memstats_heap_inuse_bytes{%(namespace)s,%(instanceLabel)s=~"%(instanceName)s"})', - network_receive_bytes: 'sum by(%(instanceLabel)s) (rate(node_network_receive_bytes_total{%(namespaceMatcher)s,%(instanceLabel)s=~"%(instanceName)s"}[$__rate_interval]))', - network_transmit_bytes: 'sum by(%(instanceLabel)s) (rate(node_network_transmit_bytes_total{%(namespaceMatcher)s,%(instanceLabel)s=~"%(instanceName)s"}[$__rate_interval]))', - disk_writes: - ||| - sum by(%(nodeLabel)s, %(instanceLabel)s, device) ( - rate( - node_disk_written_bytes_total{%(namespace)s,%(instanceLabel)s=~"%(instanceName)s"}[$__rate_interval] - ) - ) - |||, - disk_reads: - ||| - sum by(%(nodeLabel)s, %(instanceLabel)s, device) ( - rate( - node_disk_read_bytes_total{%(namespace)s,%(instanceLabel)s=~"%(instanceName)s"}[$__rate_interval] - ) - ) - |||, - disk_utilization: - ||| - 1 - ((node_filesystem_avail_bytes{%(namespaceMatcher)s,%(instanceLabel)s=~"%(instanceName)s", mountpoint="%(instanceDataDir)s"}) - / node_filesystem_size_bytes{%(namespaceMatcher)s,%(instanceLabel)s=~"%(instanceName)s", mountpoint="%(instanceDataDir)s"}) - |||, - }, - }, - - // The label used to differentiate between different nodes (i.e. servers). - per_node_label: 'instance', - - // Whether certain dashboard description headers should be shown - show_dashboard_descriptions: { - writes: true, - reads: true, - tenants: true, - top_tenants: true, - }, - - // Whether autoscaling panels and alerts should be enabled for specific Mimir services. - autoscaling_hpa_prefix: 'keda-hpa-', - - autoscaling: { - query_frontend: { - enabled: false, - hpa_name: $._config.autoscaling_hpa_prefix + 'query-frontend', - }, - ruler_query_frontend: { - enabled: false, - hpa_name: $._config.autoscaling_hpa_prefix + 'ruler-query-frontend', - }, - querier: { - enabled: false, - // hpa_name can be a regexp to support multiple querier deployments, like "keda-hpa-querier(-burst(-backup)?)?". - hpa_name: $._config.autoscaling_hpa_prefix + 'querier', - }, - ruler_querier: { - enabled: false, - hpa_name: $._config.autoscaling_hpa_prefix + 'ruler-querier', - }, - distributor: { - enabled: false, - hpa_name: $._config.autoscaling_hpa_prefix + 'distributor', - }, - ruler: { - enabled: false, - hpa_name: $._config.autoscaling_hpa_prefix + 'ruler', - }, - gateway: { - enabled: false, - hpa_name: $._config.autoscaling_hpa_prefix + 'cortex-gw.*', - }, - }, - - - // The routes to exclude from alerts. - alert_excluded_routes: [ - 'debug_pprof', - ], - - // The default datasource used for dashboards. - dashboard_datasource: 'default', - datasource_regex: '', - - // Tunes histogram recording rules to aggregate over this interval. - // Set to at least twice the scrape interval; otherwise, recording rules will output no data. - // Set to four times the scrape interval to account for edge cases: https://www.robustperception.io/what-range-should-i-use-with-rate/ - recording_rules_range_interval: '1m', - - // Used to inject rows into dashboards at specific places that support it. - injectRows: {}, - - // Used to add additional services to dashboards that support it. - extraServiceNames: [], - - // When using early rejection of inflight requests in ingesters and distributors (using -ingester.limit-inflight-requests-using-grpc-method-limiter - // and -distributor.limit-inflight-requests-using-grpc-method-limiter options), rejected requests will not count towards standard Mimir metrics - // like cortex_request_duration_seconds_count. Enabling this will make them visible on the dashboard again. - // - // Disabled by default, because when -ingester.limit-inflight-requests-using-grpc-method-limiter and -distributor.limit-inflight-requests-using-grpc-method-limiter is - // not used (default), then rejected requests are already counted as failures. - show_rejected_requests_on_writes_dashboard: false, - }, -} \ No newline at end of file +import 'mimir-mixin/mixin.libsonnet' \ No newline at end of file diff --git a/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet b/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet index d58b51ec..0bd0b339 100644 --- a/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet +++ b/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet @@ -109,7 +109,7 @@ value: datasource, }, hide: 0, - label: 'Data Source', + label: 'Data source', name: 'datasource', options: [], query: 'prometheus', diff --git a/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/alerts.libsonnet b/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/alerts.libsonnet index 753099cd..f222d4d9 100644 --- a/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/alerts.libsonnet +++ b/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/alerts/alerts.libsonnet @@ -312,6 +312,21 @@ local utils = import 'mixin-utils/utils.libsonnet'; message: '%(product)s ingester %(alert_instance_variable)s in %(alert_aggregation_variables)s has ingested samples with timestamps more than 1h in the future.' % $._config, }, }, + { + alert: $.alertName('StoreGatewayTooManyFailedOperations'), + 'for': '5m', + expr: ||| + sum by(%(alert_aggregation_labels)s, operation) (rate(thanos_objstore_bucket_operation_failures_total{component="store-gateway"}[1m])) > 0 + ||| % { + alert_aggregation_labels: $._config.alert_aggregation_labels, + }, + labels: { + severity: 'warning', + }, + annotations: { + message: '%(product)s store-gateway %(alert_instance_variable)s in %(alert_aggregation_variables)s is experiencing {{ $value | humanizePercentage }} errors while doing {{ $labels.operation }} on the object storage.' % $._config, + }, + }, ] + [ { alert: $.alertName('RingMembersMismatch'), diff --git a/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/config.libsonnet b/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/config.libsonnet index 74a17c4e..63504168 100644 --- a/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/config.libsonnet +++ b/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/config.libsonnet @@ -70,7 +70,7 @@ // docs/sources/mimir/manage/monitoring-grafana-mimir/requirements.md job_names: { ingester: ['ingester.*', 'cortex', 'mimir', 'mimir-write.*'], // Match also custom and per-zone ingester deployments. - distributor: ['distributor', 'cortex', 'mimir', 'mimir-write.*'], + distributor: ['distributor.*', 'cortex', 'mimir', 'mimir-write.*'], // Match also per-zone distributor deployments. querier: ['querier.*', 'cortex', 'mimir', 'mimir-read.*'], // Match also custom querier deployments. ruler_querier: ['ruler-querier.*'], // Match also custom querier deployments. ruler: ['ruler', 'cortex', 'mimir', 'mimir-backend.*'], @@ -78,15 +78,15 @@ ruler_query_frontend: ['ruler-query-frontend.*'], // Match also custom ruler-query-frontend deployments. query_scheduler: ['query-scheduler.*', 'mimir-backend.*'], // Not part of single-binary. Match also custom query-scheduler deployments. ruler_query_scheduler: ['ruler-query-scheduler.*'], // Not part of single-binary. Match also custom query-scheduler deployments. - ring_members: ['admin-api', 'alertmanager', 'compactor.*', 'distributor', 'ingester.*', 'querier.*', 'ruler', 'ruler-querier.*', 'store-gateway.*', 'cortex', 'mimir', 'mimir-write.*', 'mimir-read.*', 'mimir-backend.*'], + ring_members: ['admin-api', 'alertmanager', 'compactor.*', 'distributor.*', 'ingester.*', 'querier.*', 'ruler', 'ruler-querier.*', 'store-gateway.*', 'cortex', 'mimir', 'mimir-write.*', 'mimir-read.*', 'mimir-backend.*'], store_gateway: ['store-gateway.*', 'cortex', 'mimir', 'mimir-backend.*'], // Match also per-zone store-gateway deployments. - gateway: ['gateway', 'cortex-gw', 'cortex-gw-internal'], + gateway: ['gateway', 'cortex-gw.*'], // Match also custom and per-zone gateway deployments. compactor: ['compactor.*', 'cortex', 'mimir', 'mimir-backend.*'], // Match also custom compactor deployments. alertmanager: ['alertmanager', 'cortex', 'mimir', 'mimir-backend.*'], overrides_exporter: ['overrides-exporter', 'mimir-backend.*'], // The following are job matchers used to select all components in a given "path". - write: ['distributor', 'ingester.*', 'mimir-write.*'], + write: ['distributor.*', 'ingester.*', 'mimir-write.*'], read: ['query-frontend.*', 'querier.*', 'ruler-query-frontend.*', 'ruler-querier.*', 'mimir-read.*'], backend: ['ruler', 'query-scheduler.*', 'ruler-query-scheduler.*', 'store-gateway.*', 'compactor.*', 'alertmanager', 'overrides-exporter', 'mimir-backend.*'], }, diff --git a/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet b/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet index 83a89156..e3e5afe3 100644 --- a/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet +++ b/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet @@ -1206,4 +1206,10 @@ local utils = import 'mixin-utils/utils.libsonnet'; replaceFields: replaceFields, }), + lokiMetricsQueryPanel(queries, legends='', unit='short'):: + super.queryPanel(queries, legends) + + { + datasource: '${lokidatasource}', + yaxes: $.yaxes(unit), + }, } diff --git a/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/rollout-progress.libsonnet b/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/rollout-progress.libsonnet index 69ba7f4d..b019afa8 100644 --- a/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/rollout-progress.libsonnet +++ b/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/rollout-progress.libsonnet @@ -382,7 +382,7 @@ local filename = 'mimir-rollout-progress.json'; // // Performance comparison with 24h ago // - $.panel('Latency vs 24h ago') + + $.timeseriesPanel('Latency vs 24h ago') + $.queryPanel([||| 1 - ( avg_over_time(histogram_quantile(0.99, sum by (le) (%(per_cluster_label)s_job_route:cortex_request_duration_seconds_bucket:sum_rate{%(write_job_matcher)s, route=~"%(write_http_routes_regex)s"} offset 24h))[1h:]) @@ -395,12 +395,18 @@ local filename = 'mimir-rollout-progress.json'; / avg_over_time(histogram_quantile(0.99, sum by (le) (%(per_cluster_label)s_job_route:cortex_request_duration_seconds_bucket:sum_rate{%(read_job_matcher)s, route=~"%(read_http_routes_regex)s"}))[1h:]) ) - ||| % config], ['writes', 'reads']) + { - yaxes: $.yaxes({ - format: 'percentunit', - min: null, // Can be negative. - }), - + ||| % config], ['writes', 'reads']) + + { + fieldConfig: { + defaults: { + unit: 'percentunit', + custom: { + fillOpacity: 10, + }, + }, + }, + } + + { id: 12, gridPos: { h: 8, w: 8, x: 16, y: 8 }, }, diff --git a/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/slow-queries.libsonnet b/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/slow-queries.libsonnet index 9f686950..466e1efd 100644 --- a/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/slow-queries.libsonnet +++ b/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/slow-queries.libsonnet @@ -5,10 +5,154 @@ local filename = 'mimir-slow-queries.json'; [filename]: ($.dashboard('Slow queries') + { uid: std.md5(filename) }) .addClusterSelectorTemplates(false) + .addRow( + $.row('Accross tenants') + .addPanel( + $.panel('Response time') + + $.lokiMetricsQueryPanel( + [ + 'quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()' % [$._config.per_cluster_label, $._config.per_namespace_label], + 'quantile_over_time(0.5, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()' % [$._config.per_cluster_label, $._config.per_namespace_label], + ], + ['p99', 'p50'], + unit='s', + ) + ) + .addPanel( + $.panel('Fetched series') + + $.lokiMetricsQueryPanel( + [ + 'quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()' % [$._config.per_cluster_label, $._config.per_namespace_label], + 'quantile_over_time(0.5, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()' % [$._config.per_cluster_label, $._config.per_namespace_label], + ], + ['p99', 'p50'], + ) + ) + .addPanel( + $.panel('Fetched chunks') + + $.lokiMetricsQueryPanel( + [ + 'quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()' % [$._config.per_cluster_label, $._config.per_namespace_label], + 'quantile_over_time(0.5, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()' % [$._config.per_cluster_label, $._config.per_namespace_label], + ], + ['p99', 'p50'], + unit='bytes', + ) + ) + .addPanel( + $.panel('Response size') + + $.lokiMetricsQueryPanel( + [ + 'quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()' % [$._config.per_cluster_label, $._config.per_namespace_label], + 'quantile_over_time(0.5, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()' % [$._config.per_cluster_label, $._config.per_namespace_label], + ], + ['p99', 'p50'], + unit='bytes', + ) + ) + .addPanel( + $.panel('Time span') + + $.lokiMetricsQueryPanel( + [ + 'quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()' % [$._config.per_cluster_label, $._config.per_namespace_label], + 'quantile_over_time(0.5, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()' % [$._config.per_cluster_label, $._config.per_namespace_label], + ], + ['p99', 'p50'], + unit='s', + ) + ) + ) + .addRow( + $.row('Top 10 tenants') { collapse: true } + .addPanel( + $.panel('P99 response time') + + $.lokiMetricsQueryPanel( + 'topk(10, quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user))' % [$._config.per_cluster_label, $._config.per_namespace_label], + '{{user}}', + unit='s', + ) + ) + .addPanel( + $.panel('P99 fetched series') + + $.lokiMetricsQueryPanel( + 'topk(10, quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user))' % [$._config.per_cluster_label, $._config.per_namespace_label], + '{{user}}', + ) + ) + .addPanel( + $.panel('P99 fetched chunks') + + $.lokiMetricsQueryPanel( + 'topk(10, quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user))' % [$._config.per_cluster_label, $._config.per_namespace_label], + '{{user}}', + unit='bytes', + ) + ) + .addPanel( + $.panel('P99 response size') + + $.lokiMetricsQueryPanel( + 'topk(10, quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user))' % [$._config.per_cluster_label, $._config.per_namespace_label], + '{{user}}', + unit='bytes', + ) + ) + .addPanel( + $.panel('P99 time span') + + $.lokiMetricsQueryPanel( + 'topk(10, quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user))' % [$._config.per_cluster_label, $._config.per_namespace_label], + '{{user}}', + unit='s', + ) + ) + ) + .addRow( + ( + $.row('Top 10 User-Agents') { collapse: true } + .addPanel( + $.panel('P99 response time') + + $.lokiMetricsQueryPanel( + 'topk(10, quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user_agent))' % [$._config.per_cluster_label, $._config.per_namespace_label], + '{{user_agent}}', + unit='s', + ) + ) + .addPanel( + $.panel('P99 fetched series') + + $.lokiMetricsQueryPanel( + 'topk(10, quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user_agent))' % [$._config.per_cluster_label, $._config.per_namespace_label], + '{{user_agent}}', + ) + ) + .addPanel( + $.panel('P99 fetched chunks') + + $.lokiMetricsQueryPanel( + 'topk(10, quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user_agent))' % [$._config.per_cluster_label, $._config.per_namespace_label], + '{{user_agent}}', + unit='bytes', + ) + ) + .addPanel( + $.panel('P99 response size') + + $.lokiMetricsQueryPanel( + 'topk(10, quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user_agent))' % [$._config.per_cluster_label, $._config.per_namespace_label], + '{{user_agent}}', + unit='bytes', + ) + ) + .addPanel( + $.panel('P99 time span') + + $.lokiMetricsQueryPanel( + 'topk(10, quantile_over_time(0.99, {%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user_agent))' % [$._config.per_cluster_label, $._config.per_namespace_label], + '{{user_agent}}', + unit='s', + ) + ) + ) + ) .addRow( $.row('') .addPanel( { + height: '500px', title: 'Slow queries', type: 'table', datasource: '${lokidatasource}', @@ -17,7 +161,7 @@ local filename = 'mimir-slow-queries.json'; targets: [ { // Filter out the remote read endpoint. - expr: '{%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | response_time > ${min_duration}' % [$._config.per_cluster_label, $._config.per_namespace_label], + expr: '{%s=~"$cluster",%s=~"$namespace",name=~"query-frontend.*"} |= "query stats" != "/api/v1/read" | logfmt | user=~"${tenant_id}" | user_agent=~"${user_agent}" | response_time > ${min_duration}' % [$._config.per_cluster_label, $._config.per_namespace_label], instant: false, legendFormat: '', range: true, @@ -34,27 +178,11 @@ local filename = 'mimir-slow-queries.json'; source: 'labels', }, }, - { - // Compute the query time range. - id: 'calculateField', - options: { - alias: 'Time range', - mode: 'binary', - binary: { - left: 'param_end', - operator: '-', - reducer: 'sum', - right: 'param_start', - }, - reduce: { reducer: 'sum' }, - replaceFields: false, - }, - }, { id: 'organize', options: { // Hide fields we don't care. - local hiddenFields = ['caller', 'cluster', 'container', 'host', 'id', 'job', 'level', 'line', 'method', 'msg', 'name', 'namespace', 'param_end', 'param_start', 'param_time', 'path', 'pod', 'pod_template_hash', 'query_wall_time_seconds', 'stream', 'traceID', 'tsNs', 'labels', 'Line', 'Time'], + local hiddenFields = ['caller', 'cluster', 'container', 'host', 'id', 'job', 'level', 'line', 'method', 'msg', 'name', 'namespace', 'path', 'pod', 'pod_template_hash', 'query_wall_time_seconds', 'stream', 'traceID', 'tsNs', 'labels', 'Line', 'Time'], excludeByName: { [field]: true @@ -62,7 +190,7 @@ local filename = 'mimir-slow-queries.json'; }, // Order fields. - local orderedFields = ['ts', 'user', 'param_query', 'Time range', 'param_step', 'response_time'], + local orderedFields = ['ts', 'user', 'length', 'param_start', 'param_end', 'param_time', 'param_step', 'param_query', 'response_time'], indexByName: { [orderedFields[i]]: i @@ -170,6 +298,29 @@ local filename = 'mimir-slow-queries.json'; }, query: defaultValue, }, + // Add a variable to configure the tenant to filter on. + { + local defaultValue = '.*', + + type: 'textbox', + name: 'user_agent', + label: 'User-Agent HTTP Header', + hide: 0, + options: [ + { + selected: true, + text: defaultValue, + value: defaultValue, + }, + ], + current: { + // Default value. + selected: true, + text: defaultValue, + value: defaultValue, + }, + query: defaultValue, + }, ], }, } + { diff --git a/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/tenants.libsonnet b/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/tenants.libsonnet index 3f3f8415..df9f6a30 100644 --- a/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/tenants.libsonnet +++ b/monitoring-mixins/mimir-mixin/vendor/github.com/grafana/mimir/operations/mimir-mixin/dashboards/tenants.libsonnet @@ -620,6 +620,58 @@ local filename = 'mimir-tenants.json'; ) ) + .addRow( + $.row('Alertmanager') + .addPanel( + $.panel('Alerts') + + $.queryPanel( + [ + 'sum by (user) (cortex_alertmanager_alerts{%(job)s, user="$user"})' % { job: $.jobMatcher($._config.job_names.alertmanager) }, + 'sum by (user) (cortex_alertmanager_silences{%(job)s, user="$user"})' % { job: $.jobMatcher($._config.job_names.alertmanager) }, + ], + ['alerts', 'silences'] + ) + ) + .addPanel( + $.panel('NPS') + + $.successFailurePanel( + ||| + ( + sum(rate(cortex_alertmanager_notifications_total{%(job)s, user="$user"}[$__rate_interval])) + - + on() (sum(rate(cortex_alertmanager_notifications_failed_total{%(job)s, user="$user"}[$__rate_interval])) or on () vector(0)) + ) > 0 + ||| % { + job: $.jobMatcher($._config.job_names.alertmanager), + }, + 'sum(rate(cortex_alertmanager_notifications_failed_total{%(job)s, user="$user"}[$__rate_interval]))' % { + job: $.jobMatcher($._config.job_names.alertmanager), + }, + ) + ) + .addPanel( + $.panel('NPS by integration') + + $.queryPanel( + [ + ||| + ( + sum(rate(cortex_alertmanager_notifications_total{%(job)s, user="$user"}[$__rate_interval])) by(integration) + - + (sum(rate(cortex_alertmanager_notifications_failed_total{%(job)s, user="$user"}[$__rate_interval])) by(integration) or + (sum(rate(cortex_alertmanager_notifications_total{%(job)s, user="$user"}[$__rate_interval])) by(integration) * 0) + )) > 0 + ||| % { + job: $.jobMatcher($._config.job_names.alertmanager), + }, + 'sum(rate(cortex_alertmanager_notifications_failed_total{%(job)s, user="$user"}[$__rate_interval])) by(integration)' % { + job: $.jobMatcher($._config.job_names.alertmanager), + }, + ], + ['success - {{ integration }}', 'failed - {{ integration }}'] + ) + ) + ) + .addRow( $.row('Read Path - Queries (User)') .addPanel( diff --git a/monitoring-mixins/tempo-mixin/deploy/alerts.yaml b/monitoring-mixins/tempo-mixin/deploy/alerts.yaml index 50a89a83..bb8bdfeb 100644 --- a/monitoring-mixins/tempo-mixin/deploy/alerts.yaml +++ b/monitoring-mixins/tempo-mixin/deploy/alerts.yaml @@ -16,7 +16,7 @@ groups: message: There are {{ printf "%f" $value }} unhealthy compactor(s). runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoCompactorUnhealthy expr: | - max by (cluster, namespace) (tempo_ring_members{state="Unhealthy", name="tempo", namespace=~".*"}) > 0 + max by (cluster, namespace) (tempo_ring_members{state="Unhealthy", name="compactor", namespace=~".*"}) > 0 for: 15m labels: severity: critical @@ -25,7 +25,7 @@ groups: message: There are {{ printf "%f" $value }} unhealthy distributor(s). runbook_url: https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoDistributorUnhealthy expr: | - max by (cluster, namespace) (tempo_ring_members{state="Unhealthy", name="tempo", namespace=~".*"}) > 0 + max by (cluster, namespace) (tempo_ring_members{state="Unhealthy", name="distributor", namespace=~".*"}) > 0 for: 15m labels: severity: warning diff --git a/monitoring-mixins/tempo-mixin/deploy/dashboards_out/tempo-reads.json b/monitoring-mixins/tempo-mixin/deploy/dashboards_out/tempo-reads.json index 7d50c57e..9cf3b97e 100644 --- a/monitoring-mixins/tempo-mixin/deploy/dashboards_out/tempo-reads.json +++ b/monitoring-mixins/tempo-mixin/deploy/dashboards_out/tempo-reads.json @@ -68,7 +68,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"api_.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"api_.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -143,7 +143,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -152,7 +152,7 @@ "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -161,7 +161,7 @@ "step": 10 }, { - "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"api_.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"api_.*\"}[$__rate_interval])) by (route)", + "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\",route=~\"api_.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\",route=~\"api_.*\"}[$__rate_interval])) by (route)", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -260,7 +260,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"api_.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"api_.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -335,7 +335,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -344,7 +344,7 @@ "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -353,7 +353,7 @@ "step": 10 }, { - "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"api_.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"api_.*\"}[$__rate_interval])) by (route)", + "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\",route=~\"api_.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\",route=~\"api_.*\"}[$__rate_interval])) by (route)", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -452,7 +452,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"querier_api_.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"querier_api_.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -527,7 +527,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"querier_api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",route=~\"querier_api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -536,7 +536,7 @@ "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"querier_api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",route=~\"querier_api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -545,7 +545,7 @@ "step": 10 }, { - "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"querier_api_.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"querier_api_.*\"}[$__rate_interval])) by (route)", + "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/querier\",route=~\"querier_api_.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\",route=~\"querier_api_.*\"}[$__rate_interval])) by (route)", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -644,7 +644,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_querier_external_endpoint_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_querier_external_endpoint_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -719,7 +719,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_querier_external_endpoint_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval])) by (le,endpoint)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_querier_external_endpoint_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\"}[$__rate_interval])) by (le,endpoint)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -728,7 +728,7 @@ "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_querier_external_endpoint_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval])) by (le,endpoint)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_querier_external_endpoint_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\"}[$__rate_interval])) by (le,endpoint)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -737,7 +737,7 @@ "step": 10 }, { - "expr": "sum(rate(tempo_querier_external_endpoint_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval])) by (endpoint) * 1e3 / sum(rate(tempo_querier_external_endpoint_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval])) by (endpoint)", + "expr": "sum(rate(tempo_querier_external_endpoint_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/querier\"}[$__rate_interval])) by (endpoint) * 1e3 / sum(rate(tempo_querier_external_endpoint_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\"}[$__rate_interval])) by (endpoint)", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -836,7 +836,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"/tempopb.Querier/.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/tempopb.Querier/.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -911,7 +911,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (le,route)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -920,7 +920,7 @@ "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (le,route)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -929,7 +929,7 @@ "step": 10 }, { - "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (route)", + "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (route)", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1028,7 +1028,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -1103,7 +1103,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1112,7 +1112,7 @@ "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1121,7 +1121,7 @@ "step": 10 }, { - "expr": "sum(rate(tempo_memcache_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempo_memcache_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/querier\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1220,7 +1220,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=\"GET\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -1295,7 +1295,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=\"GET\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1304,7 +1304,7 @@ "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=\"GET\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1313,7 +1313,7 @@ "step": 10 }, { - "expr": "sum(rate(tempodb_backend_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=\"GET\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=\"GET\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempodb_backend_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1380,7 +1380,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/tempo-mixin/deploy/dashboards_out/tempo-resources.json b/monitoring-mixins/tempo-mixin/deploy/dashboards_out/tempo-resources.json index 3a679664..0e81a264 100644 --- a/monitoring-mixins/tempo-mixin/deploy/dashboards_out/tempo-resources.json +++ b/monitoring-mixins/tempo-mixin/deploy/dashboards_out/tempo-resources.json @@ -69,21 +69,21 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\"})", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\", resource=\"cpu\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -169,21 +169,21 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\", resource=\"memory\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -258,7 +258,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})", + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{instance}}", @@ -356,21 +356,21 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\", resource=\"cpu\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -456,21 +456,21 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\", resource=\"memory\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -545,7 +545,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})", + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{instance}}", @@ -643,21 +643,21 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\", resource=\"cpu\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -743,21 +743,21 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\", resource=\"memory\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -832,7 +832,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})", + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ingester\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{instance}}", @@ -930,21 +930,21 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\"})", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\", resource=\"cpu\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -1030,21 +1030,21 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\", resource=\"memory\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -1119,7 +1119,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})", + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/metrics-generator\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{instance}}", @@ -1217,21 +1217,21 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\", resource=\"cpu\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -1317,21 +1317,21 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\", resource=\"memory\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -1406,7 +1406,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})", + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{instance}}", @@ -1504,21 +1504,21 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\", resource=\"cpu\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -1604,21 +1604,21 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\", resource=\"memory\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -1693,7 +1693,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})", + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/querier\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{instance}}", @@ -1791,21 +1791,21 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\", resource=\"cpu\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -1891,21 +1891,21 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\", resource=\"memory\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -1980,7 +1980,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})", + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/compactor\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{instance}}", @@ -2045,7 +2045,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/tempo-mixin/deploy/dashboards_out/tempo-rollout-progress.json b/monitoring-mixins/tempo-mixin/deploy/dashboards_out/tempo-rollout-progress.json index 004e4908..2e0b3221 100644 --- a/monitoring-mixins/tempo-mixin/deploy/dashboards_out/tempo-rollout-progress.json +++ b/monitoring-mixins/tempo-mixin/deploy/dashboards_out/tempo-rollout-progress.json @@ -208,7 +208,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]))\n", + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -320,7 +320,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]))\n", + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -428,7 +428,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]))\n", + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -540,7 +540,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}))\n", + "expr": "histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}))\n", "format": null, "instant": false, "interval": "", @@ -644,7 +644,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\"}[$__rate_interval]))\n", + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -756,7 +756,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\"}[$__rate_interval]))\n", + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -864,7 +864,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\"}[$__rate_interval]))\n", + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -976,7 +976,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\"}))\n", + "expr": "histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\"}))\n", "format": null, "instant": false, "interval": "", @@ -1251,14 +1251,14 @@ "steppedLine": false, "targets": [ { - "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}))[1h:])\n)\n", + "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}))[1h:])\n)\n", "format": "time_series", "interval": "1m", "legendFormat": "writes", "legendLink": null }, { - "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\"}))[1h:])\n)\n", + "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\"}))[1h:])\n)\n", "format": "time_series", "interval": "1m", "legendFormat": "reads", @@ -1317,7 +1317,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/tempo-mixin/deploy/dashboards_out/tempo-tenants.json b/monitoring-mixins/tempo-mixin/deploy/dashboards_out/tempo-tenants.json index 45b3fe4e..b4e8926d 100644 --- a/monitoring-mixins/tempo-mixin/deploy/dashboards_out/tempo-tenants.json +++ b/monitoring-mixins/tempo-mixin/deploy/dashboards_out/tempo-tenants.json @@ -77,7 +77,7 @@ ], "targets": [ { - "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",user=\"$tenant\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})\n) by (limit_name)\n", + "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",user=\"$tenant\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/compactor\"})\n) by (limit_name)\n", "format": "table", "instant": true, "legendFormat": "", @@ -176,21 +176,21 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_distributor_bytes_received_total{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\"}[$__rate_interval]))", + "expr": "sum(rate(tempo_distributor_bytes_received_total{cluster=~\"$cluster\", job=~\"($namespace)/distributor\",tenant=\"$tenant\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "received", "legendLink": null }, { - "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",user=\"$tenant\",limit_name=\"ingestion_rate_limit_bytes\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",limit_name=\"ingestion_rate_limit_bytes\"})\n) by (ingestion_rate_limit_bytes)\n", + "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",user=\"$tenant\",limit_name=\"ingestion_rate_limit_bytes\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",limit_name=\"ingestion_rate_limit_bytes\"})\n) by (ingestion_rate_limit_bytes)\n", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",user=\"$tenant\",limit_name=\"ingestion_burst_size_bytes\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",limit_name=\"ingestion_burst_size_bytes\"})\n) by (ingestion_burst_size_bytes)\n", + "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",user=\"$tenant\",limit_name=\"ingestion_burst_size_bytes\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",limit_name=\"ingestion_burst_size_bytes\"})\n) by (ingestion_burst_size_bytes)\n", "format": "time_series", "interval": "1m", "legendFormat": "burst limit", @@ -265,14 +265,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_distributor_spans_received_total{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\"}[$__rate_interval]))", + "expr": "sum(rate(tempo_distributor_spans_received_total{cluster=~\"$cluster\", job=~\"($namespace)/distributor\",tenant=\"$tenant\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "accepted", "legendLink": null }, { - "expr": "sum(rate(tempo_discarded_spans_total{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\"}[$__rate_interval])) by (reason)", + "expr": "sum(rate(tempo_discarded_spans_total{cluster=~\"$cluster\", job=~\"($namespace)/distributor\",tenant=\"$tenant\"}[$__rate_interval])) by (reason)", "format": "time_series", "interval": "1m", "legendFormat": "refused {{ reason }}", @@ -358,21 +358,21 @@ "steppedLine": false, "targets": [ { - "expr": "max(tempo_ingester_live_traces{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\"})", + "expr": "max(tempo_ingester_live_traces{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",tenant=\"$tenant\"})", "format": "time_series", "interval": "1m", "legendFormat": "live traces", "legendLink": null }, { - "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",user=\"$tenant\",limit_name=\"max_global_traces_per_user\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",limit_name=\"max_global_traces_per_user\"})\n) by (max_global_traces_per_user)\n", + "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",user=\"$tenant\",limit_name=\"max_global_traces_per_user\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",limit_name=\"max_global_traces_per_user\"})\n) by (max_global_traces_per_user)\n", "format": "time_series", "interval": "1m", "legendFormat": "global limit", "legendLink": null }, { - "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",user=\"$tenant\",limit_name=\"max_local_traces_per_user\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",limit_name=\"max_local_traces_per_user\"})\n) by (max_local_traces_per_user)\n", + "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",user=\"$tenant\",limit_name=\"max_local_traces_per_user\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",limit_name=\"max_local_traces_per_user\"})\n) by (max_local_traces_per_user)\n", "format": "time_series", "interval": "1m", "legendFormat": "local limit", @@ -459,7 +459,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\",op=\"traces\"}[$__rate_interval])) by (status)", + "expr": "sum(rate(tempo_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\",tenant=\"$tenant\",op=\"traces\"}[$__rate_interval])) by (status)", "format": "time_series", "interval": "1m", "legendFormat": "{{ status }}", @@ -534,7 +534,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\",op=\"search\"}[$__rate_interval])) by (status)", + "expr": "sum(rate(tempo_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\",tenant=\"$tenant\",op=\"search\"}[$__rate_interval])) by (status)", "format": "time_series", "interval": "1m", "legendFormat": "{{ status }}", @@ -615,7 +615,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(tempodb_blocklist_length{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\"})", + "expr": "avg(tempodb_blocklist_length{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",tenant=\"$tenant\"})", "format": "time_series", "interval": "1m", "legendFormat": "length", @@ -684,7 +684,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(tempodb_compaction_outstanding_blocks{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\"})\n/\ncount(tempo_build_info{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})\n", + "expr": "sum(tempodb_compaction_outstanding_blocks{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",tenant=\"$tenant\"})\n/\ncount(tempo_build_info{cluster=~\"$cluster\", job=~\"($namespace)/compactor\"})\n", "format": "time_series", "interval": "1m", "legendFormat": "blocks", @@ -765,7 +765,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_metrics_generator_bytes_received_total{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\"}[$__rate_interval]))", + "expr": "sum(rate(tempo_metrics_generator_bytes_received_total{cluster=~\"$cluster\", job=~\"($namespace)/metrics-generator\",tenant=\"$tenant\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "rate", @@ -846,14 +846,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(tempo_metrics_generator_registry_active_series{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\"})", + "expr": "sum(tempo_metrics_generator_registry_active_series{cluster=~\"$cluster\", job=~\"($namespace)/metrics-generator\",tenant=\"$tenant\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{ tenant }}", "legendLink": null }, { - "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",user=\"$tenant\",limit_name=\"metrics_generator_max_active_series\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",limit_name=\"metrics_generator_max_active_series\"})\n) by (metrics_generator_max_active_series)\n", + "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",user=\"$tenant\",limit_name=\"metrics_generator_max_active_series\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",limit_name=\"metrics_generator_max_active_series\"})\n) by (metrics_generator_max_active_series)\n", "format": "time_series", "interval": "1m", "legendFormat": "limit", @@ -918,7 +918,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -987,7 +987,7 @@ "multi": false, "name": "tenant", "options": [ ], - "query": "label_values(tempodb_blocklist_length{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}, tenant)", + "query": "label_values(tempodb_blocklist_length{cluster=~\"$cluster\", job=~\"($namespace)/compactor\"}, tenant)", "refresh": 1, "regex": "", "sort": 2, diff --git a/monitoring-mixins/tempo-mixin/deploy/dashboards_out/tempo-writes.json b/monitoring-mixins/tempo-mixin/deploy/dashboards_out/tempo-writes.json index 37d45cad..50c567ec 100644 --- a/monitoring-mixins/tempo-mixin/deploy/dashboards_out/tempo-writes.json +++ b/monitoring-mixins/tempo-mixin/deploy/dashboards_out/tempo-writes.json @@ -68,7 +68,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -143,7 +143,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -152,7 +152,7 @@ "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -161,7 +161,7 @@ "step": 10 }, { - "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -250,7 +250,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (grpc_status) (\n rate(\n label_replace(\n {cluster=~\"$cluster\", job=~\"($namespace)/tempo\", __name__=~\"envoy_cluster_grpc_proto_collector_trace_v1_TraceService_[0-9]+\"},\n \"grpc_status\", \"$1\", \"__name__\", \"envoy_cluster_grpc_proto_collector_trace_v1_TraceService_(.+)\"\n )\n [$__interval:$__interval]\n )\n)\n", + "expr": "sum by (grpc_status) (\n rate(\n label_replace(\n {cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", __name__=~\"envoy_cluster_grpc_proto_collector_trace_v1_TraceService_[0-9]+\"},\n \"grpc_status\", \"$1\", \"__name__\", \"envoy_cluster_grpc_proto_collector_trace_v1_TraceService_(.+)\"\n )\n [$__interval:$__interval]\n )\n)\n", "format": "time_series", "interval": "1m", "legendFormat": "{{grpc_status}}", @@ -357,14 +357,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_receiver_accepted_spans{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval]))", + "expr": "sum(rate(tempo_receiver_accepted_spans{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "accepted", "legendLink": null }, { - "expr": "sum(rate(tempo_receiver_refused_spans{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval]))", + "expr": "sum(rate(tempo_receiver_refused_spans{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "refused", @@ -439,7 +439,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_distributor_push_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_distributor_push_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -448,7 +448,7 @@ "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_distributor_push_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_distributor_push_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -457,7 +457,7 @@ "step": 10 }, { - "expr": "sum(rate(tempo_distributor_push_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_distributor_push_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempo_distributor_push_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_distributor_push_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -556,7 +556,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -631,7 +631,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -640,7 +640,7 @@ "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -649,7 +649,7 @@ "step": 10 }, { - "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -748,7 +748,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",method=\"Memcache.Put\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -823,7 +823,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -832,7 +832,7 @@ "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -841,7 +841,7 @@ "step": 10 }, { - "expr": "sum(rate(tempo_memcache_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempo_memcache_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",method=\"Memcache.Put\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",method=\"Memcache.Put\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -940,7 +940,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=~\"(PUT|POST)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -1015,7 +1015,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1024,7 +1024,7 @@ "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1033,7 +1033,7 @@ "step": 10 }, { - "expr": "sum(rate(tempodb_backend_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempodb_backend_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1132,7 +1132,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",method=\"Memcache.Put\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -1207,7 +1207,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1216,7 +1216,7 @@ "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1225,7 +1225,7 @@ "step": 10 }, { - "expr": "sum(rate(tempo_memcache_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempo_memcache_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",method=\"Memcache.Put\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",method=\"Memcache.Put\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1324,7 +1324,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=~\"(PUT|POST)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -1399,7 +1399,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1408,7 +1408,7 @@ "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1417,7 +1417,7 @@ "step": 10 }, { - "expr": "sum(rate(tempodb_backend_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempodb_backend_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1484,7 +1484,7 @@ "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/tempo-mixin/deploy/manifests/k8s-all-in-one.yaml b/monitoring-mixins/tempo-mixin/deploy/manifests/k8s-all-in-one.yaml index 9205e789..f2cc51b2 100644 --- a/monitoring-mixins/tempo-mixin/deploy/manifests/k8s-all-in-one.yaml +++ b/monitoring-mixins/tempo-mixin/deploy/manifests/k8s-all-in-one.yaml @@ -71,7 +71,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"api_.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"api_.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -146,7 +146,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -155,7 +155,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -164,7 +164,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"api_.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"api_.*\"}[$__rate_interval])) by (route)", + "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\",route=~\"api_.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\",route=~\"api_.*\"}[$__rate_interval])) by (route)", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -263,7 +263,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"api_.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\", route=~\"api_.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -338,7 +338,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -347,7 +347,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\",route=~\"api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -356,7 +356,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"api_.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"api_.*\"}[$__rate_interval])) by (route)", + "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\",route=~\"api_.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\",route=~\"api_.*\"}[$__rate_interval])) by (route)", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -455,7 +455,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"querier_api_.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\", route=~\"querier_api_.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -530,7 +530,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"querier_api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",route=~\"querier_api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -539,7 +539,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"querier_api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",route=~\"querier_api_.*\"}[$__rate_interval])) by (le,route)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -548,7 +548,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"querier_api_.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"querier_api_.*\"}[$__rate_interval])) by (route)", + "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/querier\",route=~\"querier_api_.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\",route=~\"querier_api_.*\"}[$__rate_interval])) by (route)", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -647,7 +647,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_querier_external_endpoint_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_querier_external_endpoint_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -722,7 +722,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_querier_external_endpoint_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval])) by (le,endpoint)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_querier_external_endpoint_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\"}[$__rate_interval])) by (le,endpoint)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -731,7 +731,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_querier_external_endpoint_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval])) by (le,endpoint)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_querier_external_endpoint_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\"}[$__rate_interval])) by (le,endpoint)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -740,7 +740,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempo_querier_external_endpoint_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval])) by (endpoint) * 1e3 / sum(rate(tempo_querier_external_endpoint_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval])) by (endpoint)", + "expr": "sum(rate(tempo_querier_external_endpoint_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/querier\"}[$__rate_interval])) by (endpoint) * 1e3 / sum(rate(tempo_querier_external_endpoint_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\"}[$__rate_interval])) by (endpoint)", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -839,7 +839,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"/tempopb.Querier/.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/tempopb.Querier/.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -914,7 +914,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (le,route)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -923,7 +923,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (le,route)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (le,route)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -932,7 +932,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (route)", + "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (route) * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Querier/.*\"}[$__rate_interval])) by (route)", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1031,7 +1031,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -1106,7 +1106,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1115,7 +1115,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1124,7 +1124,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempo_memcache_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempo_memcache_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/querier\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\",method=~\"Memcache.Get|Memcache.GetMulti\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1223,7 +1223,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=\"GET\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -1298,7 +1298,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=\"GET\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1307,7 +1307,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=\"GET\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1316,7 +1316,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempodb_backend_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=\"GET\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=\"GET\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempodb_backend_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1383,7 +1383,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -1558,21 +1558,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\"})", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\", resource=\"cpu\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -1658,21 +1658,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"cortex-gw(-internal)?\", resource=\"memory\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -1747,7 +1747,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})", + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{instance}}", @@ -1845,21 +1845,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\", resource=\"cpu\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -1945,21 +1945,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\", resource=\"memory\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -2034,7 +2034,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})", + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{instance}}", @@ -2132,21 +2132,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\", resource=\"cpu\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -2232,21 +2232,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\", resource=\"memory\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -2321,7 +2321,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})", + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ingester\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{instance}}", @@ -2419,21 +2419,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\"})", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\", resource=\"cpu\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -2519,21 +2519,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"metrics-generator\", resource=\"memory\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -2608,7 +2608,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})", + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/metrics-generator\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{instance}}", @@ -2706,21 +2706,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\", resource=\"cpu\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -2806,21 +2806,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\", resource=\"memory\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -2895,7 +2895,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})", + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{instance}}", @@ -2993,21 +2993,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\", resource=\"cpu\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -3093,21 +3093,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\", resource=\"memory\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -3182,7 +3182,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})", + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/querier\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{instance}}", @@ -3280,21 +3280,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\", resource=\"cpu\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -3380,21 +3380,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"})", + "expr": "sum by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"tempo\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\", resource=\"memory\"} > 0)", "format": "time_series", "interval": "1m", "legendFormat": "request", @@ -3469,7 +3469,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})", + "expr": "sum by(instance) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/compactor\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{instance}}", @@ -3534,7 +3534,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -3848,7 +3848,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]))\n", + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -3960,7 +3960,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]))\n", + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -4068,7 +4068,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]))\n", + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -4180,7 +4180,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}))\n", + "expr": "histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}))\n", "format": null, "instant": false, "interval": "", @@ -4284,7 +4284,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\"}[$__rate_interval]))\n", + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -4396,7 +4396,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\"}[$__rate_interval]))\n", + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -4504,7 +4504,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\"}[$__rate_interval]))\n", + "expr": "sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\"}[$__rate_interval]))\n", "format": null, "instant": false, "interval": "", @@ -4616,7 +4616,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\"}))\n", + "expr": "histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\"}))\n", "format": null, "instant": false, "interval": "", @@ -4891,14 +4891,14 @@ data: "steppedLine": false, "targets": [ { - "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}))[1h:])\n)\n", + "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}))[1h:])\n)\n", "format": "time_series", "interval": "1m", "legendFormat": "writes", "legendLink": null }, { - "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"tempo_api_.*\"}))[1h:])\n)\n", + "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=~\"tempo_api_.*\"}))[1h:])\n)\n", "format": "time_series", "interval": "1m", "legendFormat": "reads", @@ -4957,7 +4957,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -5140,7 +5140,7 @@ data: ], "targets": [ { - "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",user=\"$tenant\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})\n) by (limit_name)\n", + "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",user=\"$tenant\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/compactor\"})\n) by (limit_name)\n", "format": "table", "instant": true, "legendFormat": "", @@ -5239,21 +5239,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_distributor_bytes_received_total{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\"}[$__rate_interval]))", + "expr": "sum(rate(tempo_distributor_bytes_received_total{cluster=~\"$cluster\", job=~\"($namespace)/distributor\",tenant=\"$tenant\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "received", "legendLink": null }, { - "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",user=\"$tenant\",limit_name=\"ingestion_rate_limit_bytes\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",limit_name=\"ingestion_rate_limit_bytes\"})\n) by (ingestion_rate_limit_bytes)\n", + "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",user=\"$tenant\",limit_name=\"ingestion_rate_limit_bytes\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",limit_name=\"ingestion_rate_limit_bytes\"})\n) by (ingestion_rate_limit_bytes)\n", "format": "time_series", "interval": "1m", "legendFormat": "limit", "legendLink": null }, { - "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",user=\"$tenant\",limit_name=\"ingestion_burst_size_bytes\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",limit_name=\"ingestion_burst_size_bytes\"})\n) by (ingestion_burst_size_bytes)\n", + "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",user=\"$tenant\",limit_name=\"ingestion_burst_size_bytes\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",limit_name=\"ingestion_burst_size_bytes\"})\n) by (ingestion_burst_size_bytes)\n", "format": "time_series", "interval": "1m", "legendFormat": "burst limit", @@ -5328,14 +5328,14 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_distributor_spans_received_total{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\"}[$__rate_interval]))", + "expr": "sum(rate(tempo_distributor_spans_received_total{cluster=~\"$cluster\", job=~\"($namespace)/distributor\",tenant=\"$tenant\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "accepted", "legendLink": null }, { - "expr": "sum(rate(tempo_discarded_spans_total{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\"}[$__rate_interval])) by (reason)", + "expr": "sum(rate(tempo_discarded_spans_total{cluster=~\"$cluster\", job=~\"($namespace)/distributor\",tenant=\"$tenant\"}[$__rate_interval])) by (reason)", "format": "time_series", "interval": "1m", "legendFormat": "refused {{ reason }}", @@ -5421,21 +5421,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "max(tempo_ingester_live_traces{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\"})", + "expr": "max(tempo_ingester_live_traces{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",tenant=\"$tenant\"})", "format": "time_series", "interval": "1m", "legendFormat": "live traces", "legendLink": null }, { - "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",user=\"$tenant\",limit_name=\"max_global_traces_per_user\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",limit_name=\"max_global_traces_per_user\"})\n) by (max_global_traces_per_user)\n", + "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",user=\"$tenant\",limit_name=\"max_global_traces_per_user\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",limit_name=\"max_global_traces_per_user\"})\n) by (max_global_traces_per_user)\n", "format": "time_series", "interval": "1m", "legendFormat": "global limit", "legendLink": null }, { - "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",user=\"$tenant\",limit_name=\"max_local_traces_per_user\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",limit_name=\"max_local_traces_per_user\"})\n) by (max_local_traces_per_user)\n", + "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",user=\"$tenant\",limit_name=\"max_local_traces_per_user\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",limit_name=\"max_local_traces_per_user\"})\n) by (max_local_traces_per_user)\n", "format": "time_series", "interval": "1m", "legendFormat": "local limit", @@ -5522,7 +5522,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\",op=\"traces\"}[$__rate_interval])) by (status)", + "expr": "sum(rate(tempo_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\",tenant=\"$tenant\",op=\"traces\"}[$__rate_interval])) by (status)", "format": "time_series", "interval": "1m", "legendFormat": "{{ status }}", @@ -5597,7 +5597,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\",op=\"search\"}[$__rate_interval])) by (status)", + "expr": "sum(rate(tempo_query_frontend_queries_total{cluster=~\"$cluster\", job=~\"($namespace)/query-frontend\",tenant=\"$tenant\",op=\"search\"}[$__rate_interval])) by (status)", "format": "time_series", "interval": "1m", "legendFormat": "{{ status }}", @@ -5678,7 +5678,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "avg(tempodb_blocklist_length{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\"})", + "expr": "avg(tempodb_blocklist_length{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",tenant=\"$tenant\"})", "format": "time_series", "interval": "1m", "legendFormat": "length", @@ -5747,7 +5747,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(tempodb_compaction_outstanding_blocks{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\"})\n/\ncount(tempo_build_info{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"})\n", + "expr": "sum(tempodb_compaction_outstanding_blocks{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",tenant=\"$tenant\"})\n/\ncount(tempo_build_info{cluster=~\"$cluster\", job=~\"($namespace)/compactor\"})\n", "format": "time_series", "interval": "1m", "legendFormat": "blocks", @@ -5828,7 +5828,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_metrics_generator_bytes_received_total{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\"}[$__rate_interval]))", + "expr": "sum(rate(tempo_metrics_generator_bytes_received_total{cluster=~\"$cluster\", job=~\"($namespace)/metrics-generator\",tenant=\"$tenant\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "rate", @@ -5909,14 +5909,14 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(tempo_metrics_generator_registry_active_series{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",tenant=\"$tenant\"})", + "expr": "sum(tempo_metrics_generator_registry_active_series{cluster=~\"$cluster\", job=~\"($namespace)/metrics-generator\",tenant=\"$tenant\"})", "format": "time_series", "interval": "1m", "legendFormat": "{{ tenant }}", "legendLink": null }, { - "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",user=\"$tenant\",limit_name=\"metrics_generator_max_active_series\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",limit_name=\"metrics_generator_max_active_series\"})\n) by (metrics_generator_max_active_series)\n", + "expr": "max(\n max by (cluster, namespace, limit_name) (tempo_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",user=\"$tenant\",limit_name=\"metrics_generator_max_active_series\"})\n or max by (cluster, namespace, limit_name) (tempo_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",limit_name=\"metrics_generator_max_active_series\"})\n) by (metrics_generator_max_active_series)\n", "format": "time_series", "interval": "1m", "legendFormat": "limit", @@ -5981,7 +5981,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", @@ -6050,7 +6050,7 @@ data: "multi": false, "name": "tenant", "options": [ ], - "query": "label_values(tempodb_blocklist_length{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}, tenant)", + "query": "label_values(tempodb_blocklist_length{cluster=~\"$cluster\", job=~\"($namespace)/compactor\"}, tenant)", "refresh": 1, "regex": "", "sort": 2, @@ -6178,7 +6178,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -6253,7 +6253,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -6262,7 +6262,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -6271,7 +6271,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", route=\"opentelemetry_proto_collector_trace_v1_traceservice_export\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -6360,7 +6360,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (grpc_status) (\n rate(\n label_replace(\n {cluster=~\"$cluster\", job=~\"($namespace)/tempo\", __name__=~\"envoy_cluster_grpc_proto_collector_trace_v1_TraceService_[0-9]+\"},\n \"grpc_status\", \"$1\", \"__name__\", \"envoy_cluster_grpc_proto_collector_trace_v1_TraceService_(.+)\"\n )\n [$__interval:$__interval]\n )\n)\n", + "expr": "sum by (grpc_status) (\n rate(\n label_replace(\n {cluster=~\"$cluster\", job=~\"($namespace)/cortex-gw(-internal)?\", __name__=~\"envoy_cluster_grpc_proto_collector_trace_v1_TraceService_[0-9]+\"},\n \"grpc_status\", \"$1\", \"__name__\", \"envoy_cluster_grpc_proto_collector_trace_v1_TraceService_(.+)\"\n )\n [$__interval:$__interval]\n )\n)\n", "format": "time_series", "interval": "1m", "legendFormat": "{{grpc_status}}", @@ -6467,14 +6467,14 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempo_receiver_accepted_spans{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval]))", + "expr": "sum(rate(tempo_receiver_accepted_spans{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "accepted", "legendLink": null }, { - "expr": "sum(rate(tempo_receiver_refused_spans{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval]))", + "expr": "sum(rate(tempo_receiver_refused_spans{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}[$__rate_interval]))", "format": "time_series", "interval": "1m", "legendFormat": "refused", @@ -6549,7 +6549,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_distributor_push_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_distributor_push_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -6558,7 +6558,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_distributor_push_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_distributor_push_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -6567,7 +6567,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempo_distributor_push_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_distributor_push_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempo_distributor_push_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_distributor_push_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/distributor\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -6666,7 +6666,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\", route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\", route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -6741,7 +6741,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -6750,7 +6750,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -6759,7 +6759,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempo_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",route=~\"/tempopb.Pusher/Push.*\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -6858,7 +6858,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",method=\"Memcache.Put\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -6933,7 +6933,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -6942,7 +6942,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -6951,7 +6951,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempo_memcache_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempo_memcache_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",method=\"Memcache.Put\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",method=\"Memcache.Put\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -7050,7 +7050,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=~\"(PUT|POST)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -7125,7 +7125,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -7134,7 +7134,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -7143,7 +7143,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempodb_backend_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempodb_backend_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -7242,7 +7242,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",method=\"Memcache.Put\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -7317,7 +7317,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -7326,7 +7326,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempo_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",method=\"Memcache.Put\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -7335,7 +7335,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempo_memcache_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",method=\"Memcache.Put\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempo_memcache_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",method=\"Memcache.Put\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempo_memcache_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",method=\"Memcache.Put\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -7434,7 +7434,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=~\"(PUT|POST)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "interval": "1m", "legendFormat": "{{status}}", @@ -7509,7 +7509,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -7518,7 +7518,7 @@ data: "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -7527,7 +7527,7 @@ data: "step": 10 }, { - "expr": "sum(rate(tempodb_backend_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/tempo\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by ()", + "expr": "sum(rate(tempodb_backend_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by () * 1e3 / sum(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=~\"(PUT|POST)\"}[$__rate_interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -7594,7 +7594,7 @@ data: "value": "default" }, "hide": 0, - "label": "Data Source", + "label": "Data source", "name": "datasource", "options": [ ], "query": "prometheus", diff --git a/monitoring-mixins/tempo-mixin/jsonnetfile.lock.json b/monitoring-mixins/tempo-mixin/jsonnetfile.lock.json index 495e489c..a2cccc24 100644 --- a/monitoring-mixins/tempo-mixin/jsonnetfile.lock.json +++ b/monitoring-mixins/tempo-mixin/jsonnetfile.lock.json @@ -8,8 +8,8 @@ "subdir": "grafana-builder" } }, - "version": "eb731883044fc58f255d79c2a8d78a5854084e05", - "sum": "VmOxvg9FuY9UYr3lN6ZJe2HhuIErJoWimPybQr3S3yQ=" + "version": "02db06f540086fa3f67d487bd01e1b314853fb8f", + "sum": "B49EzIY2WZsFxNMJcgRxE/gcZ9ltnS8pkOOV6Q5qioc=" }, { "source": { @@ -18,7 +18,7 @@ "subdir": "mixin-utils" } }, - "version": "eb731883044fc58f255d79c2a8d78a5854084e05", + "version": "02db06f540086fa3f67d487bd01e1b314853fb8f", "sum": "PGf+vyCHqGxxS6SKNZiN3vR1xPnw6VOESXbeJrA5FaA=" }, { @@ -28,7 +28,7 @@ "subdir": "operations/tempo-mixin" } }, - "version": "2395ed534001db5ec8812c430401444db52ef21a", + "version": "3cc44fca03ba7d676dc77da6a18b8222546ede3c", "sum": "LWjLfpHs4osKk9KYVMrkBdLhCciu8eDA6T2Xh8aKIzk=" } ], diff --git a/monitoring-mixins/tempo-mixin/mixin.libsonnet b/monitoring-mixins/tempo-mixin/mixin.libsonnet index c3fbc682..6514728e 100644 --- a/monitoring-mixins/tempo-mixin/mixin.libsonnet +++ b/monitoring-mixins/tempo-mixin/mixin.libsonnet @@ -1,49 +1 @@ -local mixin = import 'tempo-mixin/mixin.libsonnet'; - -mixin { - local makePrefix(groups) = std.join('_', groups), - local makeGroupBy(groups) = std.join(', ', groups), - - _config+:: { - http_api_prefix: '', - namespace: '.*', - jobs: { - gateway: 'tempo', - query_frontend: 'tempo', - querier: 'tempo', - ingester: 'tempo', - metrics_generator: 'tempo', - distributor: 'tempo', - compactor: 'tempo', - }, - alerts: { - compactions_per_hour_failed: 2, - flushes_per_hour_failed: 2, - polls_per_hour_failed: 2, - user_configurable_overrides_polls_per_hour_failed: 5, - max_tenant_index_age_seconds: 600, - p99_request_threshold_seconds: 3, - p99_request_exclude_regex: 'metrics|/frontend.Frontend/Process|debug_pprof', - outstanding_blocks_warning: 100, - outstanding_blocks_critical: 250, - }, - - per_cluster_label: 'cluster', - namespace_selector_separator: '/', - - // Groups labels to uniquely identify and group by {jobs, clusters, tenants} - cluster_selectors: [$._config.per_cluster_label, 'namespace'], - job_selectors: [$._config.per_cluster_label, 'namespace', 'job'], - tenant_selectors: [$._config.per_cluster_label, 'namespace', 'tenant'], - - // Each group prefix is composed of `_`-separated labels - group_prefix_clusters: makePrefix($._config.cluster_selectors), - group_prefix_jobs: makePrefix($._config.job_selectors), - group_prefix_tenants: makePrefix($._config.tenant_selectors), - - // Each group-by label list is `, `-separated and unique identifies - group_by_cluster: makeGroupBy($._config.cluster_selectors), - group_by_job: makeGroupBy($._config.job_selectors), - group_by_tenant: makeGroupBy($._config.tenant_selectors), - }, -} +import 'tempo-mixin/mixin.libsonnet' \ No newline at end of file diff --git a/monitoring-mixins/tempo-mixin/vendor/github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet b/monitoring-mixins/tempo-mixin/vendor/github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet index d58b51ec..0bd0b339 100644 --- a/monitoring-mixins/tempo-mixin/vendor/github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet +++ b/monitoring-mixins/tempo-mixin/vendor/github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet @@ -109,7 +109,7 @@ value: datasource, }, hide: 0, - label: 'Data Source', + label: 'Data source', name: 'datasource', options: [], query: 'prometheus',